ó
    <±h¤J ã                   ó–  • S r SSKrSSKJr  SSKJrJrJr  SSKrSSKJ	r	  SSK
Jr  SSKJr  SS	KJr  SS
KJrJr  SSKJr  SSKJrJrJr  SSKJrJrJr  SSKJr  \R>                  " \ 5      r!   SmS\	RD                  S\RF                  S\RF                  S\RF                  S\\RF                     S\\$   S\$S\\RF                     4S jjr% " S S\	RD                  5      r& " S S\	RD                  5      r'   SnS\RF                  S\$S\\(   S \)S!\*4
S" jjr+  SoS\RF                  S#\\(\*4   S\\(   S!\*4S$ jjr, " S% S&\	RD                  5      r- " S' S(\	RD                  5      r. " S) S*\	RD                  5      r/\ " S+ S,\5      5       r0 " S- S.\	RD                  5      r1 " S/ S0\	RD                  5      r2 " S1 S2\05      r3\\" S3S49 " S5 S6\5      5       5       r4\\" S7S49 " S8 S9\5      5       5       r5\\" S:S49 " S; S<\5      5       5       r6\\" S=S49 " S> S?\5      5       5       r7\\" S@S49 " SA SB\5      5       5       r8\\" SCS49 " SD SE\5      5       5       r9SF\Rt                  Rv                  SG\RF                  SH\RF                  4SI jr<SpSJ\RF                  SK\\RF                     SH\RF                  4SL jjr= " SM SN\	RD                  5      r> " SO SP\	RD                  5      r? " SQ SR\	RD                  5      r@ " SS ST\	RD                  5      rA\ " SU SV\05      5       rB " SW SX\	RD                  5      rC\" SYS49 " SZ S[\05      5       rD " S\ S]\	RD                  5      rE\" S^S49 " S_ S`\05      5       rF\" SaS49 " Sb Sc\	RD                  5      5       rG\" SdS49 " Se Sf\05      5       rH " Sg Sh\	RD                  5      rI\" SiS49 " Sj Sk\05      5       rJ/ SlQrKg)qzPyTorch PatchTST model.é    N)Ú	dataclass)ÚCallableÚOptionalÚUnion)Únné   )ÚACT2CLS)ÚFlashAttentionKwargs)ÚBaseModelOutput)ÚALL_ATTENTION_FUNCTIONSÚPreTrainedModel)ÚUnpack)ÚNegativeBinomialOutputÚNormalOutputÚStudentTOutput)ÚModelOutputÚauto_docstringÚloggingé   )ÚPatchTSTConfigÚmoduleÚqueryÚkeyÚvalueÚattention_maskÚscalingÚdropoutÚ	head_maskc                 óÀ  • Uc  UR                  S5      S-  n[        R                  " XR                  SS5      5      U-  n	Ub  X”-   n	[        R
                  R                  U	SS9n	Ub  X—R                  SSSS5      -  n	[        R
                  R                  X–U R                  S9n	[        R                  " X“5      n
U
R                  SS5      R                  5       n
X©4$ )Néÿÿÿÿç      à¿é   r   ©Údimr   )ÚpÚtraining)ÚsizeÚtorchÚmatmulÚ	transposer   Ú
functionalÚsoftmaxÚviewr   r&   Ú
contiguous)r   r   r   r   r   r   r   r   ÚkwargsÚattn_weightsÚattn_outputs              Úf/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/models/patchtst/modeling_patchtst.pyÚeager_attention_forwardr3   &   sÌ   € ð Ø—*‘*˜R“. DÑ(ˆä—<’< §}¡}°Q¸Ó':Ó;¸gÑE€LØÑ!Ø#Ñ4ˆä—=‘=×(Ñ(¨¸2Ð(Ð>€LàÑØ#§n¡n°Q¸¸A¸qÓ&AÑAˆä—=‘=×(Ñ(¨È6Ï?É?Ð(Ð[€LÜ—,’,˜|Ó3€KØ×'Ñ'¨¨1Ó-×8Ñ8Ó:€KàÐ$Ð$ó    c                   óZ  ^ • \ rS rSrSr     SS\S\S\S\S\S\S	\\	   4U 4S
 jjjr
    SS\R                  S\\R                     S\\R                     S\\R                     S\\   S\\   S\\R                  \\R                     \\\R                        4   4S jjrSrU =r$ )ÚPatchTSTAttentionéE   z=Multi-headed attention from 'Attention Is All You Need' paperÚ	embed_dimÚ	num_headsr   Ú
is_decoderÚbiasÚ	is_causalÚconfigc                 óæ  >• [         TU ]  5         Xl        X l        X0l        X-  U l        Xpl        U R
                  U-  U R                  :w  a  [        SU R                   SU S35      eU R
                  S-  U l        X@l	        X`l
        [        R                  " XUS9U l        [        R                  " XUS9U l        [        R                  " XUS9U l        [        R                  " XUS9U l        g )Nz;embed_dim must be divisible by num_heads (got `embed_dim`: z and `num_heads`: ú).r!   ©r;   )ÚsuperÚ__init__r8   r9   r   Úhead_dimr=   Ú
ValueErrorr   r:   r<   r   ÚLinearÚk_projÚv_projÚq_projÚout_proj)	Úselfr8   r9   r   r:   r;   r<   r=   Ú	__class__s	           €r2   rB   ÚPatchTSTAttention.__init__H   sÎ   ø€ ô 	‰ÑÔØ"ŒØ"ŒØŒØ!Ñ.ˆŒØŒàM‰M˜IÑ%¨$¯.©.Ó8ÜØMÈdÏnÉnÐM]Ø$ Y K¨rð3óð ð —}‘} dÑ*ˆŒØ$ŒØ"Œä—i’i 	¸4Ñ@ˆŒÜ—i’i 	¸4Ñ@ˆŒÜ—i’i 	¸4Ñ@ˆŒÜŸ	š	 )¸TÑBˆr4   Úhidden_statesÚkey_value_statesr   Úlayer_head_maskÚoutput_attentionsr/   Úreturnc                 ó   • USLnUR                   SS u  p‰U(       a  UR                   S   OU	n
X‰SU R                  4nXŠSU R                  4nU R                  U5      R                  " U6 R	                  SS5      nU(       a  UOUnU R                  U5      R                  " U6 R	                  SS5      nU R                  U5      R                  " U6 R	                  SS5      n[        nU R                  R                  S:w  a  [        U R                  R                     nU" U UUUU4U R                  (       d  SOU R                  U R                  UUS.UD6u  nnUR                  X‰S5      R                  5       nU R!                  U5      nUUS4$ )z#Input shape: Batch x Time x ChannelNr    r   r"   Úeagerç        )r   r   rP   r   )ÚshaperC   rH   r-   r*   rF   rG   r3   r=   Ú_attn_implementationr   r&   r   r   Úreshaper.   rI   )rJ   rM   rN   r   rO   rP   r/   Úis_cross_attentionÚbszÚtgt_lenÚsrc_lenÚq_input_shapeÚkv_input_shapeÚquery_statesÚcurrent_statesÚ
key_statesÚvalue_statesÚattention_interfacer1   r0   s                       r2   ÚforwardÚPatchTSTAttention.forwardg   sŽ  € ð .°TÐ9Ðð %×*Ñ*¨3¨BÐ/‰ˆÞ/AÐ"×(Ñ(¨Ò+Àwˆà r¨4¯=©=Ð9ˆØ¨¨D¯M©MÐ:ˆð —{‘{ =Ó1×6Ò6¸ÐF×PÑPÐQRÐTUÓVˆæ-?Ñ)À]ˆØ—[‘[ Ó0×5Ò5°~ÐF×PÑPÐQRÐTUÓVˆ
Ø—{‘{ >Ó2×7Ò7¸ÐH×RÑRÐSTÐVWÓXˆä(?ÐØ;‰;×+Ñ+¨wÓ6Ü"9¸$¿+¹+×:ZÑ:ZÑ"[Ðá$7ØØØØØð%
ð  $Ÿ}Ÿ}‘C°$·,±,Ø—L‘LØ/Ø%ñ%
ð ñ%
Ñ!ˆ\ð "×)Ñ)¨#¸Ó;×FÑFÓHˆØ—m‘m KÓ0ˆà˜L¨$Ð.Ð.r4   )r=   r   r8   rC   r<   r:   rF   r9   rI   rH   r   rG   )rT   FTFN)NNNF)Ú__name__Ú
__module__Ú__qualname__Ú__firstlineno__Ú__doc__ÚintÚfloatÚboolr   r   rB   r(   ÚTensorr   r
   Útuplerc   Ú__static_attributes__Ú__classcell__©rK   s   @r2   r6   r6   E   s   ø† ÙGð Ø ØØØ+/ñCàðCð ðCð ð	Cð
 ðCð ðCð ðCð ˜Ñ(÷Cð CðD 48Ø15Ø26Ø,1ñ3/à—|‘|ð3/ð # 5§<¡<Ñ0ð3/ð ! §¡Ñ.ð	3/ð
 " %§,¡,Ñ/ð3/ð $ D™>ð3/ð Ð-Ñ.ð3/ð 
ˆu|‰|˜X e§l¡lÑ3°X¸eÀEÇLÁLÑ>QÑ5RÐRÑ	S÷3/ó 3/r4   r6   c                   óV   ^ • \ rS rSrSrS\4U 4S jjrS\R                  4S jr	Sr
U =r$ )ÚPatchTSTBatchNormé   zH
Compute batch normalization over the sequence length (time) dimension.
r=   c                 ó~   >• [         TU ]  5         [        R                  " UR                  UR
                  S9U l        g )N©Úeps)rA   rB   r   ÚBatchNorm1dÚd_modelÚnorm_epsÚ	batchnorm©rJ   r=   rK   s     €r2   rB   ÚPatchTSTBatchNorm.__init__¢   s(   ø€ Ü‰ÑÔÜŸš¨¯©¸F¿O¹OÑLˆr4   Úinputsc                 ól   • UR                  SS5      nU R                  U5      nUR                  SS5      $ )zÓ
Parameters:
    inputs (`torch.Tensor` of shape `(batch_size, sequence_length, d_model)`):
        input for Batch norm calculation
Returns:
    `torch.Tensor` of shape `(batch_size, sequence_length, d_model)`
r   r"   )r*   r{   )rJ   r~   Úoutputs      r2   rc   ÚPatchTSTBatchNorm.forward¦   s7   € ð ×!Ñ! ! QÓ'ˆØ—‘ Ó'ˆØ×Ñ  1Ó%Ð%r4   )r{   ©re   rf   rg   rh   ri   r   rB   r(   rm   rc   ro   rp   rq   s   @r2   rs   rs      s+   ø† ñðM˜~÷ Mð
&˜eŸl™l÷ 
&ò 
&r4   rs   r~   Ú
mask_ratioÚunmasked_channel_indicesÚchannel_consistent_maskingÚ
mask_valuec                 ó˜  • US:  d  US:¼  a  [        SU S35      eU R                  u  pVpxU R                  n	[        USU-
  -  5      n
U(       a*  [        R
                  " USXyS9nUR                  SUS5      nO[        R
                  " XVXyS9n[        R                  " XVXyS9nSUSS2SS2SU
24'   [        R                  " USS9n[        R                  " USS9n[        R                  " USUS	9nUR                  S5      R                  SSSU5      nUb  SUSS2USS2SS24'   U R                  UR                  5       U5      nXüS
   4$ )a†  random_masking: Mask the input considering the control variables.

Args:
    inputs (`torch.Tensor` of shape `(batch_size, num_channels, sequence_length, num_features)`):
        The input tensor to mask.
    mask_ratio (`float`):
        Masking ratio applied to mask the input data during random pretraining. It is the number between 0 and 1.
    unmasked_channel_indices (list, *optional*):
        Indices of channels that will not be masked.
    channel_consistent_masking (bool, *optional*, defaults to `False`):
        When true, masking will be same across all channels of a timeseries. Otherwise, masking positions will vary
        across channels.
    mask_value (int, *optional*, defaults to 0):
        Define the value of masked patches for pretraining.

Returns:
    `tuple(torch.Tensor)`: inputs_mask, masked input, same shape as input Tensor and mask tensor of shape [bs x c x
    n]
r   r   zMask ratio z has to be between 0 and 1.©ÚdeviceNr    r#   )r$   Úindex©.r   )rD   rU   r‰   rj   r(   ÚrandÚrepeatÚonesÚargsortÚgatherÚ	unsqueezeÚmasked_fillrl   )r~   rƒ   r„   r…   r†   Ú
batch_sizeÚnum_channelsÚsequence_lengthÚnum_featuresr‰   Úlen_keepÚnoiseÚmaskÚids_shuffleÚids_restoreÚinputs_masks                   r2   Úrandom_maskingr   ³   sA  € ð4 Aƒ~˜ q›Ü˜; z lÐ2MÐNÓOÐOà>D¿l¹lÑ;€J˜oØ]‰]€Fä? a¨*¡nÑ5Ó6€Hæ!Ü—
’
˜: q¨/ÑIˆØ—‘˜Q ¨aÓ0‰ô —
’
˜:°_ÑTˆô :Š:j°ÑO€DØ€DŠŠAˆyˆyˆÑô —-’- ¨2Ñ.€KÜ—-’- °Ñ4€Kä<Š<˜ "¨KÑ8€DØ>‰>˜"Ó×$Ñ$ Q¨¨1¨lÓ;€DØÑ+Ø23ˆŠQÐ(ª!ªQÐ.Ñ/à×$Ñ$ T§Y¡Y£[°*Ó=€KØ˜V™Ð$Ð$r4   Únum_forecast_mask_patchesc                 óN  • [        U[        5      (       a  U/nU Vs/ sH  nSPM     nnU R                  u  pgp‰[        R                  " XgX€R
                  S9n
/ nSn[        U5      n[        X5       HG  u  pïUS::  d  Xè:¼  a  [        SU S35      e[        Xo-  U-  5      nUR                  XïU/5        UU-  nMI     [        US S9nXÆ:  a  US   S   Xl-
  -   US   S'   OXÆ:”  a  US	   S   XÆ-
  -   US	   S'   SnU H  u  nnnUU-   nSU
UU2S
S
2U* S
24'   UnM     [        R                  " U
R                  S   5      nU
U   n
U
R                  S	5      R                  SSSU	5      n
Ub  SU
S
S
2US
S
2S
S
24'   U R                  U
R                  5       U5      nUU
S   4$ s  snf )ai  Forecast masking that masks the last K patches where K is from the num_forecast_mask_patches.
If num_forecast_mask_patches is a list, samples in the batch will be randomly masked by numbers defined in the list.

Parameters:
    inputs (`torch.Tensor`):
        Input of shape `(bs, num_channels, num_patch, patch_length)`
    num_forecast_mask_patches (`list`):
        Number of patches to be masked at the end of each batch sample. e.g. 4 or [3, 5].
    unmasked_channel_indices (`list`, *optional*):
        Indices of channels that are not masked.
    mask_value (`int`, *optional*, defaults to 0):
        Values in the masked patches will be filled by `mask_value`.

Returns:
    `tuple(torch.Tensor)`: inputs_mask, masked input, same shape as inputs Tensor and Mask tensor of shape `(bs,
    num_channels , num_patch)` or `(bs, tsg1, tsg2, num_channels, num_patch)`
r   rˆ   r   znum_forecast_mask_patches z6 should be greater than 0 and less than total patches.c                 ó   • U S   $ )Nr"   © )Úxs    r2   Ú<lambda>Ú"forecast_masking.<locals>.<lambda>  s   € ¨!¨Aª$r4   )r   r"   r    Nr‹   )Ú
isinstancerj   rU   r(   Úzerosr‰   ÚsumÚziprD   ÚappendÚsortedÚrandpermr‘   r   r’   rl   )r~   rž   r„   r†   Ú_Úforecast_mask_ratiosr“   r”   r•   r–   r™   Út_listÚtotal_lengthÚtotal_ratioÚpatch_lengthÚratioÚtemp_lenÚbatch1Ú	patch_lenÚbatch2Úpermrœ   s                         r2   Úforecast_maskingr¸   í   sí  € ô0 Ð+¬S×1Ñ1Ø%>Ð$?Ð!Ù'@ÓAÑ'@ !›AÑ'@ÐÐAà>D¿l¹lÑ;€J˜oÜ;Š;z°ÏÉÑW€Dà€FØ€LÜÐ*Ó+€Kä"Ð#<ÖSÑˆØ˜1Ó Ó ?ÜØ,¨\¨NÐ:pÐqóð ô zÑ)¨KÑ7Ó8ˆØ‰|¨HÐ5Ô6Ø˜Ñ Šñ  Tô F¡Ñ/€FàÓ Ø˜a‘y ‘| zÑ'@ÑAˆˆq‰	!ŠØ	Ó	"Ø˜r™
 1™¨Ñ)BÑCˆˆr‰
1‰à€FÛ"(Ñˆ	1hØ˜(Ñ"ˆØ./ˆˆVFˆ]šA 	˜z™{Ð*Ñ+ØŠñ #)ô
 >Š>˜$Ÿ*™* Q™-Ó(€DØ‰:€Dà>‰>˜"Ó×$Ñ$ Q¨¨1¨lÓ;€DØÑ+Ø23ˆŠQÐ(ª!ªQÐ.Ñ/à×$Ñ$ T§Y¡Y£[°*Ó=€KØ˜˜V™Ð$Ð$ùòO Bs   F"c                   óV   ^ • \ rS rSrSrS\4U 4S jjrS\R                  4S jr	Sr
U =r$ )ÚPatchTSTPatchifyi1  z£
A class to patchify the time series sequence into different patches

Returns:
    `torch.Tensor` of shape `(batch_size, num_channels, num_patches, patch_length)`
r=   c                 ó  >• [         TU ]  5         UR                  U l        UR                  U l        UR
                  U l        U R                  U R                  ::  a&  [        SU R                   SU R                   S35      e[        U R                  U R                  5      U R                  -
  U R
                  -  S-   U l        U R                  U R
                  U R                  S-
  -  -   nU R                  U-
  U l	        g )NzSequence length (z+) has to be greater than the patch length (Ú)r   )
rA   rB   Úcontext_lengthr•   r±   Úpatch_striderD   ÚmaxÚnum_patchesÚsequence_start)rJ   r=   Únew_sequence_lengthrK   s      €r2   rB   ÚPatchTSTPatchify.__init__9  sò   ø€ Ü‰ÑÔà%×4Ñ4ˆÔØ"×/Ñ/ˆÔØ"×/Ñ/ˆÔà×Ñ 4×#4Ñ#4Ó4ÜØ# D×$8Ñ$8Ð#9Ð9dÐei×evÑevÐdwÐwxÐyóð ô
   × 4Ñ 4°d×6GÑ6GÓHÈ4×K\ÑK\Ñ\Ðae×arÑarÑrÐuvÑvˆÔØ"×/Ñ/°$×2CÑ2CÀt×GWÑGWÐZ[ÑG[Ñ2\Ñ\ÐØ"×2Ñ2Ð5HÑHˆÕr4   Úpast_valuesc                 ó4  • UR                   S   nX R                  :w  a  [        SU SU R                   S35      eUSS2U R                  S2SS24   nUR	                  SU R
                  U R                  S9nUR                  SS5      R                  5       nU$ )zñ
Parameters:
    past_values (`torch.Tensor` of shape `(batch_size, sequence_length, num_channels)`, *required*):
        Input for patchification

Returns:
    `torch.Tensor` of shape `(batch_size, num_channels, num_patches, patch_length)`
éþÿÿÿzInput sequence length (z%) doesn't match model configuration (r?   N)Ú	dimensionr'   Ústepéýÿÿÿ)	rU   r•   rD   rÁ   Úunfoldr±   r¾   r*   r.   )rJ   rÄ   r•   r€   s       r2   rc   ÚPatchTSTPatchify.forwardJ  s¦   € ð &×+Ñ+¨BÑ/ˆØ×2Ñ2Ó2ÜØ)¨/Ð):Ð:_Ð`d×`tÑ`tÐ_uÐuwÐxóð ð šQ × 3Ñ 3Ñ 5²qÐ8Ñ9ˆà—‘¨°$×2CÑ2CÈ$×J[ÑJ[Ð\ˆà×!Ñ! " bÓ)×4Ñ4Ó6ˆØˆr4   )rÀ   r±   r¾   r•   rÁ   r‚   rq   s   @r2   rº   rº   1  s+   ø† ñðI˜~÷ Ið" 5§<¡<÷ ò r4   rº   c                   óV   ^ • \ rS rSrSrS\4U 4S jjrS\R                  4S jr	Sr
U =r$ )ÚPatchTSTMaskingia  al  
Class to perform random or forecast masking.

Parameters:
    config (`PatchTSTConfig`): model config
Returns:
    x_mask (`torch.Tensor` of shape `(batch_size, num_channels, num_patches, patch_length)`)
        Masked patched input
    mask (`torch.Tensor` of shape `(batch_size, num_channels, num_patches)`)
        Bool tensor indicating True on masked points
r=   c                 ó>  >• [         TU ]  5         UR                  U l        UR                  U l        UR                  U l        UR
                  U l        UR                  U l        UR                  U l        U R                  b  [        U R                  5      U l        g g ©N)	rA   rB   Úrandom_mask_ratior…   Ú	mask_typerž   r„   r†   rª   r|   s     €r2   rB   ÚPatchTSTMasking.__init__n  s„   ø€ Ü‰ÑÔØ!'×!9Ñ!9ˆÔØ*0×*KÑ*KˆÔ'Ø×)Ñ)ˆŒØ)/×)IÑ)IˆÔ&Ø(.×(GÑ(GˆÔ%Ø ×+Ñ+ˆŒØ×(Ñ(Ñ4Ü,2°4×3PÑ3PÓ,QˆDÕ)ð 5r4   Úpatch_inputc                 ód  • U R                   S:X  a8  [        UU R                  U R                  U R                  U R
                  S9u  p#OVU R                   S:X  a-  [        UU R                  U R                  U R
                  S9u  p#O[        SU R                    S35      eUR                  5       nX#4$ )aœ  
Parameters:
    patch_input (`torch.Tensor` of shape `(batch_size, num_channels, num_patches, patch_length)`, *required*):
        Patch input

Return:
    masked_input (`torch.Tensor` of shape `(batch_size, num_channels, num_patches, patch_length)`)
        Masked patched input
    mask (`torch.Tensor` of shape `(batch_size, num_channels, num_patches)`)
        Bool tensor indicating True on masked points

Úrandom)r~   rƒ   r„   r…   r†   Úforecast)r~   rž   r„   r†   zInvalid mask type Ú.)
rÑ   r   rÐ   r„   r…   r†   r¸   rž   rD   rl   )rJ   rÓ   Úmasked_inputr™   s       r2   rc   ÚPatchTSTMasking.forwardy  s¯   € ð >‰>˜XÓ%Ü!/Ø"Ø×1Ñ1Ø)-×)FÑ)FØ+/×+JÑ+JØŸ?™?ñ"ÑˆL˜$ð ^‰^˜zÓ)Ü!1Ø"Ø*.×*HÑ*HØ)-×)FÑ)FØŸ?™?ñ	"ÑˆL˜$ô Ð1°$·.±.Ð1AÀÐCÓDÐDð y‰y‹{ˆØÐ!Ð!r4   )r…   rÑ   r†   rž   rÐ   r„   r‚   rq   s   @r2   rÍ   rÍ   a  s+   ø† ñ
ð	R˜~÷ 	Rð!" 5§<¡<÷ !"ò !"r4   rÍ   c                   ód   ^ • \ rS rSrSrS\4U 4S jjrS	S\R                  S\	\
   4S jjrSrU =r$ )
ÚPatchTSTEncoderLayeri  z
PatchTST encoder layer
r=   c                 ó(  >• [         TU ]  5         UR                  U l        [        UR                  UR
                  UR                  US9U l        UR                  S:”  a   [        R                  " UR                  5      O[        R                  " 5       U l        UR                  S:X  a  [        U5      U l        OWUR                  S:X  a/  [        R                   " UR                  UR"                  S9U l        O[%        UR                   S35      eU R                  (       aÂ  UR                  S:”  a   [        R                  " UR                  5      O[        R                  " 5       U l        UR                  S:X  a  [        U5      U l        OWUR                  S:X  a/  [        R                   " UR                  UR"                  S9U l        O[%        UR                   S35      e[        R*                  " [        R,                  " UR                  UR.                  UR0                  S9[2        UR4                     " 5       UR6                  S:”  a   [        R                  " UR6                  5      O[        R                  " 5       [        R,                  " UR.                  UR                  UR0                  S95      U l        UR                  S:”  a   [        R                  " UR                  5      O[        R                  " 5       U l        UR                  S:X  a  [        U5      U l        OWUR                  S:X  a/  [        R                   " UR                  UR"                  S9U l        O[%        UR                   S35      eUR>                  U l        g )N)r8   r9   r   r=   r   r{   Ú	layernormrv   z$ is not a supported norm layer type.r@   ) rA   rB   Úchannel_attentionr6   ry   Únum_attention_headsÚattention_dropoutÚ	self_attnÚpath_dropoutr   ÚDropoutÚIdentityÚdropout_path1Ú	norm_typers   Únorm_sublayer1Ú	LayerNormrz   rD   Údropout_path2Únorm_sublayer2Ú
SequentialrE   Úffn_dimr;   r	   Úactivation_functionÚ
ff_dropoutÚffÚdropout_path3Únorm_sublayer3Úpre_normr|   s     €r2   rB   ÚPatchTSTEncoderLayer.__init__¢  s‘  ø€ Ü‰ÑÔà!'×!9Ñ!9ˆÔä*Ø—n‘nØ×0Ñ0Ø×,Ñ,Øñ	
ˆŒð AG×@SÑ@SÐVWÓ@WœRŸZšZ¨×(;Ñ(;Ô<Ô]_×]hÒ]hÓ]jˆÔØ×Ñ˜{Ó*Ü"3°FÓ";ˆDÕØ×Ñ Ó,Ü"$§,¢,¨v¯~©~À6Ç?Á?Ñ"SˆDÕä × 0Ñ 0Ð1Ð1UÐVÓWÐWð ×!×!ØDJ×DWÑDWÐZ[ÓD[¤§¢¨F×,?Ñ,?Ô!@Ôac×alÒalÓanˆDÔØ×Ñ ;Ó.Ü&7¸Ó&?Õ#Ø×!Ñ! [Ó0Ü&(§l¢l°6·>±>ÀvÇÁÑ&WÕ#ä  F×$4Ñ$4Ð#5Ð5YÐ!ZÓ[Ð[ô —-’-ÜIŠIf—n‘n f§n¡n¸6¿;¹;ÑGÜF×.Ñ.Ò/Ó1Ø-3×->Ñ->ÀÓ-BŒBJŠJv×(Ñ(Ô)ÌÏÊËÜIŠIf—n‘n f§n¡n¸6¿;¹;ÑGó	
ˆŒð AG×@SÑ@SÐVWÓ@WœRŸZšZ¨×(;Ñ(;Ô<Ô]_×]hÒ]hÓ]jˆÔØ×Ñ˜{Ó*Ü"3°FÓ";ˆDÕØ×Ñ Ó,Ü"$§,¢,¨v¯~©~À6Ç?Á?Ñ"SˆDÕä × 0Ñ 0Ð1Ð1UÐVÓWÐWàŸ™ˆr4   Úhidden_staterP   c                 óÆ  • UR                   u  p4pVUR                  X4-  XV5      nU R                  (       a6  U R                  U R	                  U5      US9u  pxn	XR                  U5      -   nO4U R                  XS9u  pxn	U R	                  XR                  U5      -   5      nUR                  X4XV5      nU R                  (       aá  UR                  SS5      R                  5       nUR                  X5-  XF5      nU R                  (       a6  U R                  U R                  U5      US9u  pzn	XR                  U5      -   nO4U R                  XS9u  pzn	U R                  XR                  U5      -   5      nUR                  X5XF5      nUR                  SS5      R                  5       nUR                  X4-  XV5      nU R                  (       a2  XR                  U R                  U R                  U5      5      5      -   nO1U R                  XR                  U R                  U5      5      -   5      nUR                  X4XV5      nU4nU(       a  X°R                  (       a  UW
4OU4-  nU$ )ao  
Parameters:
    hidden_state (`torch.Tensor` of shape `(batch_size, num_channels, sequence_length, d_model)`, *required*):
        Past values of the time series
    output_attentions (`bool`, *optional*):
        Whether or not to return the output attention of all layers
Return:
    `torch.Tensor` of shape `(batch_size, num_channels, sequence_length, d_model)`

)rM   rP   r"   r   )rU   r-   rò   rá   rç   rå   rW   rÞ   r*   r.   rê   ré   rð   rï   rñ   )rJ   rô   rP   r“   Únum_input_channelsr•   ry   r1   r0   r¬   Úchannel_attn_weightsÚoutputss               r2   rc   ÚPatchTSTEncoderLayer.forwardÔ  si  € ð DP×CUÑCUÑ@ˆ
¨ð $×(Ñ(¨Ñ)HÈ/Ócˆà==à+/¯>©>Ø"×1Ñ1°,Ó?ÐSdð ,:ð ,Ñ(ˆK qð (×*<Ñ*<¸[Ó*IÑI‰Lð ,0¯>©>Ø*ð ,:ð ,Ñ(ˆK qð  ×.Ñ.¨|×>PÑ>PÐQ\Ó>]Ñ/]Ó^ˆLð $×+Ñ+¨JÈOÓeˆð ×!×!à'×1Ñ1°!°QÓ7×BÑBÓDˆLà'×,Ñ,¨ZÑ-IÐK]ÓgˆLØ}}à7;·~±~Ø"&×"5Ñ"5°lÓ"CÐWhð 8Fð 8Ñ4°1ð  ,×.@Ñ.@ÀÓ.MÑM‘ð 8<·~±~Ø".ð 8Fð 8Ñ4°1ð  $×2Ñ2°<×BTÑBTÐU`ÓBaÑ3aÓbð (×/Ñ/°
ÐM_ÓiˆLà'×1Ñ1°!°QÓ7×BÑBÓDˆLð $×(Ñ(¨Ñ)HÈ/ÓcˆØ==ð (×*<Ñ*<¸T¿W¹WÀT×EXÑEXÐYeÓEfÓ=gÓ*hÑh‰Lð  ×.Ñ.¨|×>PÑ>PÐQU×QXÑQXÐYeÓQfÓ>gÑ/gÓhˆLð $×+Ñ+¨JÈOÓeˆà/ˆÞØ×?U×?U˜Ð&:Ñ;Ð\hÐ[jÑjˆGàˆr4   )
rÞ   rå   ré   rð   rï   rç   rê   rñ   rò   rá   rÏ   )re   rf   rg   rh   ri   r   rB   r(   rm   r   rl   rc   ro   rp   rq   s   @r2   rÛ   rÛ     s9   ø† ñð0(˜~÷ 0(ñdQ E§L¡Lð QÀXÈdÁ^÷ Qó Qr4   rÛ   c                   óX   • \ rS rSr% \\S'   SrSrSrS\	R                  4S jrSS jrS	rg
)ÚPatchTSTPreTrainedModeli(  r=   ÚmodelrÄ   Fr   c                 ó˜  • [        U[        5      (       aÏ  [        U R                  R                  U R                  R
                  5      U R                  R
                  -
  U R                  R                  -  S-   nU R                  R                  (       a-  [        R                  R                  UR                  SS9  US-  nUR                  U R                  U5      Ul        g[        U[        R                  5      (       aJ  UR                  R                   R#                  5         UR$                  R                   R'                  S5        g[        U[(        5      (       a^  UR*                  R                  R                   R#                  5         UR*                  R$                  R                   R'                  S5        g[        U[        R,                  5      (       ak  UR$                  R                   R                  SU R                  R.                  S9  UR                  b%  UR                  R                   R#                  5         ggg)z
Initialize weights
r   g{®Gáz”?)Ústdç      ð?rT   )Úmeanrþ   N)r¥   ÚPatchTSTPositionalEncodingr¿   r=   r½   r±   r¾   Úuse_cls_tokenr   ÚinitÚnormal_Ú	cls_tokenÚ_init_peÚposition_encrè   r;   ÚdataÚzero_ÚweightÚfill_rs   r{   rE   Úinit_std)rJ   r   rÀ   s      r2   Ú_init_weightsÚ%PatchTSTPreTrainedModel._init_weights/  s”  € ô fÔ8×9Ñ9ô D—K‘K×.Ñ.°·±×0HÑ0HÓIÈDÏKÉK×LdÑLdÑdØ—‘×)Ñ)ñ*à,-ñ.ˆKð {‰{×(×(Ü—‘—‘ × 0Ñ 0°dÑ;Ø˜qÑ à"(§/¡/°$·+±+¸{Ó"KˆFÕÜ˜¤§¡×-Ñ-ØK‰K×Ñ×"Ñ"Ô$ØM‰M×Ñ×$Ñ$ SÕ)Ü˜Ô 1×2Ñ2Ø×Ñ×!Ñ!×&Ñ&×,Ñ,Ô.Ø×Ñ×#Ñ#×(Ñ(×.Ñ.¨sÕ3Ü˜¤§	¡	×*Ñ*ØM‰M×Ñ×&Ñ&¨C°T·[±[×5IÑ5IÐ&ÑJØ{‰{Ñ&Ø—‘× Ñ ×&Ñ&Õ(ð 'ð +r4   c                 ó<   • [        U[        5      (       a  X!l        g g rÏ   )r¥   ÚPatchTSTEncoderÚgradient_checkpointing)rJ   r   r   s      r2   Ú_set_gradient_checkpointingÚ3PatchTSTPreTrainedModel._set_gradient_checkpointingI  s   € Üfœ×0Ñ0Ø,1Õ)ð 1r4   r¡   N)F)re   rf   rg   rh   r   Ú__annotations__Úbase_model_prefixÚmain_input_nameÚsupports_gradient_checkpointingr   ÚModuler  r  ro   r¡   r4   r2   rû   rû   (  s.   ‡ àÓØÐØ#€OØ&+Ð#ð) B§I¡Iô )÷42r4   rû   c                   óR   ^ • \ rS rSrS\4U 4S jjrS\R                  4S jrSr	U =r
$ )ÚPatchTSTEmbeddingiN  r=   c                 óÞ  >• [         TU ]  5         UR                  U l        UR                  U l        U R                  (       a1  [        R
                  " UR                  UR                  5      U l        g [        R                  " 5       U l        [        UR                  5       HG  nU R                  R                  [        R
                  " UR                  UR                  5      5        MI     g rÏ   )rA   rB   rö   Úshare_embeddingr   rE   r±   ry   Úinput_embeddingÚ
ModuleListÚranger©   )rJ   r=   r¬   rK   s      €r2   rB   ÚPatchTSTEmbedding.__init__O  sœ   ø€ Ü‰ÑÔØ"(×";Ñ";ˆÔØ%×5Ñ5ˆÔà××Ü#%§9¢9¨V×-@Ñ-@À&Ç.Á.Ó#QˆDÕ ä#%§=¢=£?ˆDÔ Ü˜6×4Ñ4Ö5Ø×$Ñ$×+Ñ+¬B¯IªI°f×6IÑ6IÈ6Ï>É>Ó,ZÖ[ò 6r4   rÓ   c                 óh  • UR                   S   nX R                  :w  a  [        SU R                   SU S35      eU R                  (       a  U R	                  U5      nU$ [        U5       Vs/ sH$  o@R                  U   " USS2USS2SS24   5      PM&     nn[        R                  " USS9nU$ s  snf )zõ
Parameters:
    patch_input (`torch.Tensor` of shape `(batch_size, num_channels, num_patches, patch_length)`, *required*):
        Patch input for embedding
return:
    `torch.Tensor` of shape `(batch_size, num_channels, num_patches, d_model)`
r   z&The defined number of input channels (zQ) in the config has to be the same as the number of channels in the batch input (r¼   Nr#   )rU   rö   rD   r  r  r  r(   Ústack)rJ   rÓ   rö   Ú
embeddingsÚis        r2   rc   ÚPatchTSTEmbedding.forward[  sÌ   € ð )×.Ñ.¨qÑ1ÐØ×!8Ñ!8Ó8ÜØ8¸×9PÑ9PÐ8Qð RTØTfÐSgÐghðjóð ð ××Ø×-Ñ-¨kÓ:ˆJð Ðô UZÐZlÔTmÓnÑTmÈq×.Ñ.¨qÒ1°+ºaÀÂAÂq¸jÑ2IÖJÑTmˆJÐnÜŸš Z°QÑ7ˆJØÐùò os   Á,*B/)r  rö   r  ©re   rf   rg   rh   r   rB   r(   rm   rc   ro   rp   rq   s   @r2   r  r  N  s&   ø† ð
\˜~÷ 
\ð 5§<¡<÷ ò r4   r  c                   óŽ   ^ • \ rS rSrSrS\S\4U 4S jjr\S\S\S\	R                  4S j5       rS\R                  4S	 jrS
rU =r$ )r  ir  z
Class for positional encoding
r=   rÀ   c                 óÖ  >• [         TU ]  5         UR                  U l        UR                  U l        UR                  (       aA  [        R
                  " [        R                  " SSSUR                  5      5      U l	        US-  nU R                  X5      U l        UR                  S:”  a&  [        R                  " UR                  5      U l        g [        R                  " 5       U l        g )Nr   r   )rA   rB   r  rö   r   Ú	Parameterr(   r¦   ry   r  r  r  Úpositional_dropoutrã   rä   ©rJ   r=   rÀ   rK   s      €r2   rB   Ú#PatchTSTPositionalEncoding.__init__w  s¯   ø€ Ü‰ÑÔØ#×1Ñ1ˆÔØ"(×";Ñ";ˆÔØ××äŸ\š\¬%¯+ª+°a¸¸A¸v¿~¹~Ó*NÓOˆDŒNØ˜1ÑˆKà ŸM™M¨&Ó>ˆÔð 6<×5NÑ5NÐQRÓ5RŒBJŠJv×0Ñ0Ó1ð 	ÕÜXZ×XcÒXcÓXeð 	Õr4   rQ   c                 ó$  • U R                   S:X  a5  [        R                  " [        R                  " XR
                  5      SS9nU$ U R                   S:X  Ga#  [        R                  " XR
                  5      n[        R                  " SU5      R                  S5      n[        R                  " [        R                  " SU R
                  S5      [        R                  " S5      U R
                  -  * -  5      n[        R                  " X4-  5      US S 2SS S24'   [        R                  " X4-  5      US S 2SS S24'   X"R                  5       -
  nX"R                  5       S	-  -  n[        R                  " US
S9nU$ [!        U R                    S35      e)NrÕ   T©Úrequires_gradÚsincosr   r   r"   g     ˆÃ@é
   FzN is not a valid positional encoder. Available types are 'random' and 'sincos'.)Úpositional_encoding_typer   r)  r(   Úrandnry   r¦   Úaranger‘   ÚexpÚmathÚlogÚsinÚcosr   rþ   rD   )r=   rÀ   r  ÚpositionÚdiv_terms        r2   r  Ú#PatchTSTPositionalEncoding._init_pe†  sX  € ð ×*Ñ*¨hÓ6ÜŸ<š<¬¯ª°KÇÁÓ(PÐ`dÑeˆLð Ðð ×,Ñ,°Ô8Ü Ÿ;š; {·N±NÓCˆLÜ—|’| A {Ó3×=Ñ=¸aÓ@ˆHÜ—y’y¤§¢¨a°·±ÀÓ!CÌÏÊÐQXÓHYÐ\b×\jÑ\jÑHjÐFkÑ!kÓlˆHÜ$)§I¢I¨hÑ.AÓ$BˆLš˜A˜D˜q˜D˜Ñ!Ü$)§I¢I¨hÑ.AÓ$BˆLš˜A˜D˜q˜D˜Ñ!Ø'×*;Ñ*;Ó*=Ñ=ˆLØ'×+;Ñ+;Ó+=ÀÑ+BÑCˆLÜŸ<š<¨ÀEÑJˆLð
 Ðô Ø×2Ñ2Ð3ð  4Bð  Cóð r4   rÓ   c                 óx  • U R                   (       a‹  U R                  XR                  SS 2S S 24   -   5      nU R                  U R                  S S2S S 24   -   nUR	                  UR
                  S   U R                  SS5      n[        R                  " X14SS9nU$ U R                  XR                  -   5      nU$ )Nr   r   r    r"   r#   )	r  r*  r  r  ÚexpandrU   rö   r(   Úcat)rJ   rÓ   r  Ú
cls_tokensrô   s        r2   rc   Ú"PatchTSTPositionalEncoding.forwardš  s¹   € Ø××à×1Ñ1°+×@QÑ@QÐRSÑRTÒVWÐRWÑ@XÑ2XÓYˆKàŸ™¨×):Ñ):¸2¸A¸2ºq¸5Ñ)AÑAˆIà"×)Ñ)¨+×*;Ñ*;¸AÑ*>À×@WÑ@WÐY[Ð]_Ó`ˆJä Ÿ9š9 jÐ%>ÀAÑFˆLð Ðð  ×2Ñ2°;×ARÑARÑ3RÓSˆLØÐr4   )r  rö   r  r*  r  )re   rf   rg   rh   ri   r   rj   rB   Ústaticmethodr   r)  r  r(   rm   rc   ro   rp   rq   s   @r2   r  r  r  s]   ø† ñð
˜~ð 
¸C÷ 
ð ð˜ð °cð ¸b¿l¹ló ó ðð& 5§<¡<÷ ò r4   r  c            	       óz   ^ • \ rS rSrSrS\S\4U 4S jjr  SS\R                  S\
\   S\
\   S	\4S
 jjrSrU =r$ )r  iª  z
PatchTST Encoder
r=   rÀ   c                 ó*  >• [         TU ]  U5        SU l        [        U5      U l        [        X5      U l        [        R                  " [        UR                  5       Vs/ sH  n[        U5      PM     sn5      U l        U R                  5         g s  snf )NF)rA   rB   r  r  Úembedderr  Úpositional_encoderr   r  r  Únum_hidden_layersrÛ   ÚlayersÚ	post_init)rJ   r=   rÀ   r$  rK   s       €r2   rB   ÚPatchTSTEncoder.__init__¯  sx   ø€ Ü‰Ñ˜Ô Ø&+ˆÔ#ô *¨&Ó1ˆŒä"<¸VÓ"QˆÔä—m’mÌ5ÐQW×QiÑQiÔKjÓ$kÑKjÀaÔ%9¸&Ö%AÑKjÑ$kÓlˆŒð 	‰Õùò %ls   ÁBrÓ   Úoutput_hidden_statesrP   rQ   c                 óh  • Ub  UOU R                   R                  nUb  UOU R                   R                  nU R                  U5      nU R	                  U5      nU(       a  SOSnU(       a  SOSnU R
                   H+  nU(       a  XT4-   nU" XCS9nUS   nU(       d  M#  XhS   4-   nM-     [        XEUS9$ )ar  
Parameters:
    patch_input (`torch.Tensor` of shape `(batch_size, num_channels, num_patches, patch_length)`, *required*):
        Past values of the time series
    output_hidden_states (bool, optional): Indicates if hidden states should be outputted.
    output_attentions (bool, optional): Indicates if attentions should be outputted.

return:
    `BaseModelOutput`
Nr¡   )rô   rP   r   r   )Úlast_hidden_staterM   Ú
attentions)r=   rP   rK  rE  rF  rH  r   )	rJ   rÓ   rK  rP   rô   Úencoder_statesÚall_attentionsÚencoder_layerÚlayer_outputss	            r2   rc   ÚPatchTSTEncoder.forward½  sÃ   € ð  2CÑ1NÑ-ÐTX×T_ÑT_×TqÑTqÐà$8Ñ$DÑ È$Ï+É+×JjÑJjð 	ð
 —m‘m KÓ0ˆà×.Ñ.¨{Ó;ˆæ3™¸ˆÞ0™°dˆØ!Ÿ[œ[ˆMÞ#Ø!/°/Ñ!Aá)°|ÑiˆMð )¨Ñ+ˆLç Ð Ø!/ÀÑ3CÐ2EÑ!E’ñ )ô °ÐhvÑwÐwr4   )rE  r  rH  rF  ©NN)re   rf   rg   rh   ri   r   rj   rB   r(   rm   r   rl   r   rc   ro   rp   rq   s   @r2   r  r  ª  se   ø† ñð˜~ð ¸C÷ ð" 04Ø,0ñ	(xà—\‘\ð(xð ' t™nð(xð $ D™>ð	(xð
 
÷(xó (xr4   r  zG
    Base class for model's outputs, with potential hidden states.
    )Úcustom_introc                   ó>  • \ rS rSr% SrSr\\R                     \	S'   Sr
\\\R                        \	S'   Sr\\\R                        \	S'   Sr\\R                     \	S'   Sr\\R                     \	S'   Sr\\R                     \	S	'   Sr\\R                     \	S
'   Srg)ÚPatchTSTModelOutputiè  a  
last_hidden_state (`torch.FloatTensor` of shape `(batch_size, num_channels, num_patches, patch_length)`):
    Sequence of hidden-states at the output of the last layer of the model.
hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
    Tuple of `torch.FloatTensor` (one for the output of the embeddings, if the model has an embedding layer, +
    one for the output of each layer) of shape `(batch_size, num_channels, height, width)`. Hidden-states of
    the model at the output of each layer plus the optional initial embedding outputs.
mask (`torch.FloatTensor` of shape `(batch_size, num_channels, num_patches)`, *optional*):
    Bool masked tensor indicating which patches are masked
loc (`torch.FloatTensor` of shape `(batch_size, 1, num_channels)`, *optional*):
    Mean of the input data (batch_size, sequence_length, num_channels) over the sequence_length
scale (`torch.FloatTensor` of shape `(batch_size, 1, num_channels)`, *optional*):
    Std of the input data (batch_size, sequence_length, num_channels) over the sequence_length
patch_input (`torch.FloatTensor` of shape `(batch_size, num_channels, num_patches, patch_length)`):
    Patched input to the Transformer
NrM  rM   rN  r™   ÚlocÚscalerÓ   r¡   )re   rf   rg   rh   ri   rM  r   r(   ÚFloatTensorr  rM   rn   rN  r™   rX  rY  rÓ   ro   r¡   r4   r2   rW  rW  è  s§   ‡ ñð" 6:Ðx × 1Ñ 1Ñ2Ó9Ø8<€M8˜E %×"3Ñ"3Ñ4Ñ5Ó<Ø59€J˜˜u×0Ñ0Ñ1Ñ2Ó9Ø(,€Dˆ(5×$Ñ$Ñ
%Ó,Ø'+€Cˆ%×#Ñ#Ñ	$Ó+Ø)-€Eˆ8E×%Ñ%Ñ&Ó-Ø/3€K˜%×+Ñ+Ñ,Ö3r4   rW  z4
    Output type of [`PatchTSTForPretraining`].
    c                   óÆ   • \ rS rSr% SrSr\\R                     \	S'   Sr
\\R                     \	S'   Sr\\\R                        \	S'   Sr\\\R                        \	S'   Srg)	ÚPatchTSTForPretrainingOutputi	  a
  
loss (*optional*, returned when `labels` is provided, `torch.FloatTensor` of shape `(1,)`):
    MSE loss.
prediction_output (`torch.FloatTensor` of shape `(batch_size, sequence_length, config.vocab_size)`):
    Prediction outputs of the time series modeling heads.
NÚlossÚprediction_outputrM   rN  r¡   )re   rf   rg   rh   ri   r]  r   r(   rZ  r  r^  rM   rn   rN  ro   r¡   r4   r2   r\  r\  	  sh   ‡ ñð )-€Dˆ(5×$Ñ$Ñ
%Ó,Ø59Ðx × 1Ñ 1Ñ2Ó9Ø8<€M8˜E %×"3Ñ"3Ñ4Ñ5Ó<Ø59€J˜˜u×0Ñ0Ñ1Ñ2Ö9r4   r\  z3
    Output type of [`PatchTSTForRegression`].
    c                   óÆ   • \ rS rSr% SrSr\\R                     \	S'   Sr
\\R                     \	S'   Sr\\\R                        \	S'   Sr\\\R                        \	S'   Srg)	ÚPatchTSTForRegressionOutputi  zô
loss (*optional*, returned when `labels` is provided, `torch.FloatTensor` of shape `(1,)`):
    MSE loss.
regression_outputs (`torch.FloatTensor` of shape `(batch_size, num_targets)`):
    Regression outputs of the time series modeling heads.
Nr]  Úregression_outputsrM   rN  r¡   )re   rf   rg   rh   ri   r]  r   r(   rZ  r  ra  rM   rn   rN  ro   r¡   r4   r2   r`  r`    sh   ‡ ñð )-€Dˆ(5×$Ñ$Ñ
%Ó,Ø6:Ð˜ ×!2Ñ!2Ñ3Ó:Ø8<€M8˜E %×"3Ñ"3Ñ4Ñ5Ó<Ø59€J˜˜u×0Ñ0Ñ1Ñ2Ö9r4   r`  z3
    Output type of [`PatchTSTForPrediction`].
    c                   ó  • \ rS rSr% SrSr\\R                     \	S'   Sr
\\R                     \	S'   Sr\\\R                        \	S'   Sr\\\R                        \	S'   Sr\\R                     \	S'   Sr\\R                     \	S	'   S
rg)ÚPatchTSTForPredictionOutputi1  aé  
loss (*optional*, returned when `labels` is provided, `torch.FloatTensor` of shape `(1,)`):
    MSE loss.
prediction_outputs (`torch.FloatTensor` of shape `(batch_size, prediction_length, -1)`):
    Prediction outputs of the time series modeling heads.
attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
    Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
    sequence_length)`.

    Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
    heads.
loc: (`torch.FloatTensor` of shape `(batch_size, 1, num_channels)`, *optional*)
    Mean of the input data (batch_size, sequence_length, num_channels) over the sequence_length
scale: (`torch.FloatTensor` of shape `(batch_size, 1, num_channels)`, *optional*)
    Std of the input data (batch_size, sequence_length, num_channels) over the sequence_length
Nr]  Úprediction_outputsrM   rN  rX  rY  r¡   )re   rf   rg   rh   ri   r]  r   r(   rZ  r  rd  rM   rn   rN  rX  rY  ro   r¡   r4   r2   rc  rc  1  s’   ‡ ñð" )-€Dˆ(5×$Ñ$Ñ
%Ó,Ø6:Ð˜ ×!2Ñ!2Ñ3Ó:Ø8<€M8˜E %×"3Ñ"3Ñ4Ñ5Ó<Ø59€J˜˜u×0Ñ0Ñ1Ñ2Ó9Ø'+€Cˆ%×#Ñ#Ñ	$Ó+Ø)-€Eˆ8E×%Ñ%Ñ&Ö-r4   rc  z7
    Output type of [`PatchTSTForClassification`].
    c                   óÆ   • \ rS rSr% SrSr\\R                     \	S'   Sr
\\R                     \	S'   Sr\\\R                        \	S'   Sr\\\R                        \	S'   Srg)	ÚPatchTSTForClassificationOutputiQ  as  
loss (*optional*, returned when `labels` is provided, `torch.FloatTensor` of shape `(1,)`):
    Total loss as the sum of the masked language modeling loss and the next sequence prediction
    (classification) loss.
prediction_logits (`torch.FloatTensor` of shape `(batch_size, num_targets)`):
    Prediction scores of the PatchTST modeling head (scores before SoftMax).
Nr]  Úprediction_logitsrM   rN  r¡   )re   rf   rg   rh   ri   r]  r   r(   rZ  r  rg  rM   rn   rN  ro   r¡   r4   r2   rf  rf  Q  sh   ‡ ñð )-€Dˆ(5×$Ñ$Ñ
%Ó,Ø59Ðx × 1Ñ 1Ñ2Ó9Ø8<€M8˜E %×"3Ñ"3Ñ4Ñ5Ó<Ø59€J˜˜u×0Ñ0Ñ1Ñ2Ö9r4   rf  zƒ
    Base class for time series model's predictions outputs that contains the sampled values from the chosen
    distribution.
    c                   óB   • \ rS rSr% SrSr\\R                     \	S'   Sr
g)ÚSamplePatchTSTOutputif  z˜
sequences (`torch.FloatTensor` of shape `(batch_size, num_samples, prediction_length, num_targets)`):
    Sampled values from the chosen distribution.
NÚ	sequencesr¡   )re   rf   rg   rh   ri   rj  r   r(   rZ  r  ro   r¡   r4   r2   ri  ri  f  s   ‡ ñð
 .2€Iˆx˜×)Ñ)Ñ*Ö1r4   ri  ÚinputÚtargetrQ   c                 ó&   • U R                  U5      * $ )z[
Computes the negative log likelihood loss from input distribution with respect to target.
)Úlog_prob)rk  rl  s     r2   Únllro  w  s   € ð N‰N˜6Ó"Ð"Ð"r4   Úinput_tensorÚweightsc                 óR  • Ub–  [         R                  " US:g  X-  [         R                  " U 5      5      n[         R                  " U(       a  UR	                  US9OUR	                  5       SS9nU(       a  UR	                  US9U-  $ UR	                  5       U-  $ U R                  US9$ )a:  
Computes the weighted average of a given tensor across a given `dim`, masking values associated with weight zero,
meaning instead of `nan * 0 = nan` you will get `0 * 0 = 0`.

Args:
    input_tensor (`torch.FloatTensor`):
        Input tensor, of which the average must be computed.
    weights (`torch.FloatTensor`, *optional*):
        Weights tensor, of the same shape as `input_tensor`.
    dim (`int`, *optional*):
        The dim along which to average `input_tensor`.

Returns:
    `torch.FloatTensor`: The tensor with values averaged along the specified `dim`.
r   r#   rÿ   ©Úmin)r(   ÚwhereÚ
zeros_likeÚclampr§   r   )rp  rq  r$   Úweighted_tensorÚsum_weightss        r2   Úweighted_averagerz    s™   € ð  ÑÜŸ+š+ g°¡l°LÑ4JÌE×L\ÒL\Ð]iÓLjÓkˆÜ—k’k¾# '§+¡+°# +Ñ"6À7Ç;Á;Ã=ÐVYÑZˆÞ03×#Ñ#¨Ð#Ð,ÐR]Ñ]Ð]¸×9LÑ9LÓ9NÐR]Ñ]Ð]à× Ñ  SÐ Ð)Ð)r4   c            	       óº   ^ • \ rS rSrSrS\4U 4S jjrS\R                  S\R                  S\	\R                  \R                  \R                  4   4S jr
S	rU =r$ )
ÚPatchTSTStdScaleri˜  z±
Standardize features by calculating the mean and scaling along the first dimension, and then normalizes it by
subtracting from the mean and dividing by the standard deviation.
r=   c                 ó  >• [         TU ]  5         [        US5      (       a  UR                  OSU l        [        US5      (       a  UR
                  OSU l        [        US5      (       a  UR                  U l        g SU l        g )NÚscaling_dimr   ÚkeepdimTÚminimum_scalegñhãˆµøä>)rA   rB   Úhasattrr~  r$   r  r€  r|   s     €r2   rB   ÚPatchTSTStdScaler.__init__ž  sd   ø€ Ü‰ÑÔÜ)0°¸×)GÑ)G6×%Ò%ÈQˆŒÜ)0°¸×)CÑ)Cv—~’~ÈˆŒÜ5<¸VÀ_×5UÑ5U˜V×1Ñ1ˆÕÐ[_ˆÕr4   r  Úobserved_indicatorrQ   c                 ór  • UR                  U R                  U R                  S9nUR                  S5      nX-  R                  U R                  U R                  S9U-  nX-
  U-  S-  R                  U R                  U R                  S9U-  n[        R
                  " XPR                  -   5      nX-
  U-  XF4$ )áó  
Parameters:
    data (`torch.Tensor` of shape `(batch_size, sequence_length, num_input_channels)`):
        input for Batch norm calculation
    observed_indicator (`torch.BoolTensor` of shape `(batch_size, sequence_length, num_input_channels)`):
        Calculating the scale on the observed indicator.
Returns:
    tuple of `torch.Tensor` of shapes
        (`(batch_size, sequence_length, num_input_channels)`,`(batch_size, 1, num_input_channels)`,
        `(batch_size, 1, num_input_channels)`)
©r  rÿ   r"   )r§   r$   r  Ú	clamp_minr(   Úsqrtr€  )rJ   r  rƒ  ÚdenominatorrX  ÚvariancerY  s          r2   rc   ÚPatchTSTStdScaler.forward¤  s³   € ð )×,Ñ,¨T¯X©X¸t¿|¹|Ð,ÐLˆØ!×+Ñ+¨CÓ0ˆØÑ(×-Ñ-¨d¯h©hÀÇÁÐ-ÐMÐP[Ñ[ˆà‘jÐ$6Ñ6¸1Ñ<×AÑAÀ$Ç(Á(ÐTX×T`ÑT`ÐAÐaÐdoÑoˆÜ—
’
˜8×&8Ñ&8Ñ8Ó9ˆØ‘
˜eÑ# SÐ/Ð/r4   )r$   r  r€  ©re   rf   rg   rh   ri   r   rB   r(   rm   rn   rc   ro   rp   rq   s   @r2   r|  r|  ˜  sX   ø† ñð
`˜~÷ `ð0Ø—L‘Lð0Ø6;·l±lð0à	ˆu|‰|˜UŸ\™\¨5¯<©<Ð7Ñ	8÷0ò 0r4   r|  c            	       óº   ^ • \ rS rSrSrS\4U 4S jjrS\R                  S\R                  S\	\R                  \R                  \R                  4   4S jr
S	rU =r$ )
ÚPatchTSTMeanScaleri¼  z~
Computes a scaling factor as the weighted average absolute value along the first dimension, and scales the data
accordingly.
r=   c                 óN  >• [         TU ]  5         [        US5      (       a  UR                  OSU l        [        US5      (       a  UR
                  OSU l        [        US5      (       a  UR                  OSU l        [        US5      (       a  UR                  U l        g S U l        g )Nr~  r   r  Tr€  ç»½×Ùß|Û=Údefault_scale)rA   rB   r  r~  r$   r  r€  r‘  r|   s     €r2   rB   ÚPatchTSTMeanScaler.__init__Â  s   ø€ Ü‰ÑÔÜ)0°¸×)GÑ)G6×%Ò%ÈQˆŒÜ)0°¸×)CÑ)Cv—~’~ÈˆŒÜ5<¸VÀ_×5UÑ5U˜V×1Ò1Ð[`ˆÔÜ5<¸VÀ_×5UÑ5U˜V×1Ñ1ˆÕÐ[_ˆÕr4   r  rƒ  rQ   c                 ó°  • X-  R                  5       R                  U R                  SS9nUR                  U R                  SS9nU[        R                  " USS9-  nU R
                  cL  UR                  SS9n[        R                  " UR                  S5      SS9n[        R                  " Xg-  5      nO#U R
                  [        R                  " U5      -  n[        R                  " US:„  XX5      n[        R                  " XPR                  S9nX-  n	U R                  (       d  UR                  U R                  S9nU	[        R                  " U5      U4$ )r…  Tr†  r   rs  r   r#   )Úabsr§   r$   r(   rw  r‘  ÚsqueezeÚ	ones_likeru  r€  r  rv  )
rJ   r  rƒ  Úts_sumÚnum_observedrY  Ú	batch_sumÚbatch_observationsr‘  Úscaled_datas
             r2   rc   ÚPatchTSTMeanScaler.forwardÉ  s"  € ð Ñ+×0Ñ0Ó2×6Ñ6°t·x±xÈÐ6ÐNˆØ)×-Ñ-¨d¯h©hÀÐ-ÐEˆàœŸš \°qÑ9Ñ9ˆð ×ÑÑ%ØŸ
™
 q˜
Ð)ˆIÜ!&§¢¨\×-=Ñ-=¸aÓ-@ÀaÑ!HÐÜ!ŸMšM¨)Ñ*HÓI‰Mà ×.Ñ.´·²ÀÓ1GÑGˆMô —’˜L¨1Ñ,¨eÓCˆô —’˜E×'9Ñ'9Ñ:ˆØ‘lˆà||Ø—M‘M d§h¡hMÐ/ˆEàœE×,Ò,¨UÓ3°UÐ:Ð:r4   )r‘  r$   r  r€  rŒ  rq   s   @r2   rŽ  rŽ  ¼  sX   ø† ñð
`˜~÷ `ð&;Ø—L‘Lð&;Ø6;·l±lð&;à	ˆu|‰|˜UŸ\™\¨5¯<©<Ð7Ñ	8÷&;ò &;r4   rŽ  c            
       óÆ   ^ • \ rS rSrSrS\4U 4S jjr S
S\R                  S\	\R                     S\
\R                  \R                  \R                  4   4S jjrS	rU =r$ )ÚPatchTSTNOPScalerió  zt
Assigns a scaling factor equal to 1 along the first dimension, and therefore applies no scaling to the input data.
r=   c                 ó¾   >• [         TU ]  5         [        US5      (       a  UR                  OSU l        [        US5      (       a  UR
                  U l        g SU l        g )Nr~  r   r  T)rA   rB   r  r~  r$   r  r|   s     €r2   rB   ÚPatchTSTNOPScaler.__init__ø  sF   ø€ Ü‰ÑÔÜ)0°¸×)GÑ)G6×%Ò%ÈQˆŒÜ)0°¸×)CÑ)Cv—~‘~ˆÈˆr4   r  rƒ  rQ   c                 óæ   • [         R                  " USS9R                  U R                  U R                  S9n[         R
                  " USS9R                  U R                  U R                  S9nXU4$ )aP  
Parameters:
    data (`torch.Tensor` of shape `(batch_size, sequence_length, num_input_channels)`):
        input for Batch norm calculation
Returns:
    tuple of `torch.Tensor` of shapes
        (`(batch_size, sequence_length, num_input_channels)`,`(batch_size, 1, num_input_channels)`,
        `(batch_size, 1, num_input_channels)`)
Fr.  ©r$   r  )r(   r–  r   r$   r  rv  )rJ   r  rƒ  rY  rX  s        r2   rc   ÚPatchTSTNOPScaler.forwardý  sg   € ô —’ °EÑ:×?Ñ?ÀDÇHÁHÐVZ×VbÑVbÐ?ÐcˆÜ×Ò˜t°5Ñ9×>Ñ>À4Ç8Á8ÐUY×UaÑUaÐ>ÐbˆØ˜%ÐÐr4   r¢  rÏ   )re   rf   rg   rh   ri   r   rB   r(   rm   r   rn   rc   ro   rp   rq   s   @r2   rž  rž  ó  sd   ø† ñðN˜~÷ Nð PTñ Ø—L‘Lð Ø6>¸u¿|¹|Ñ6Lð à	ˆu|‰|˜UŸ\™\¨5¯<©<Ð7Ñ	8÷ ó  r4   rž  c            	       ó¶   ^ • \ rS rSrS\4U 4S jjrS\R                  S\R                  S\\R                  \R                  \R                  4   4S jr	Sr
U =r$ )	ÚPatchTSTScaleri  r=   c                 óä   >• [         TU ]  5         UR                  S:X  d  UR                  SL a  [        U5      U l        g UR                  S:X  a  [        U5      U l        g [        U5      U l        g )Nr   Trþ   )rA   rB   r   rŽ  Úscalerr|  rž  r|   s     €r2   rB   ÚPatchTSTScaler.__init__  sU   ø€ Ü‰ÑÔØ>‰>˜VÓ# v§~¡~¸Ò'=Ü,¨VÓ4ˆDKØ^‰^˜uÓ$Ü+¨FÓ3ˆDKä+¨FÓ3ˆDKr4   r  rƒ  rQ   c                 ó2   • U R                  X5      u  pnXU4$ )aî  
Parameters:
    data (`torch.Tensor` of shape `(batch_size, sequence_length, num_input_channels)`):
        Input for scaler calculation
    observed_indicator (`torch.BoolTensor` of shape `(batch_size, sequence_length, num_input_channels)`):
        Calculating the scale on the observed indicator.
Returns:
    tuple of `torch.Tensor` of shapes
        (`(batch_size, sequence_length, num_input_channels)`,`(batch_size, 1, num_input_channels)`,
        `(batch_size, 1, um_input_channels)`)
©r§  )rJ   r  rƒ  rX  rY  s        r2   rc   ÚPatchTSTScaler.forward  s"   € ð  Ÿ;™; tÓ@Ñˆ5Ø˜%ÐÐr4   rª  )re   rf   rg   rh   r   rB   r(   rm   rn   rc   ro   rp   rq   s   @r2   r¥  r¥    sQ   ø† ð4˜~÷ 4ð Ø—L‘Lð Ø6;·l±lð à	ˆu|‰|˜UŸ\™\¨5¯<©<Ð7Ñ	8÷ ò  r4   r¥  c                   óÈ   ^ • \ rS rSrS\4U 4S jjr     SS\R                  S\\R                     S\\R                     S\\	   S\\	   S	\\	   S
\
\\4   4S jjrSrU =r$ )ÚPatchTSTModeli*  r=   c                 óf  >• [         TU ]  U5        [        U5      U l        [	        U5      U l        UR                  U l        U R
                  R                  nU R                  (       a  [        U5      U l	        O[        R                  " 5       U l	        [        XS9U l        U R                  5         g )N)rÀ   )rA   rB   r¥  r§  rº   Ú
patchifierÚdo_mask_inputrÀ   rÍ   Úmaskingr   rä   r  ÚencoderrI  r+  s      €r2   rB   ÚPatchTSTModel.__init__,  s   ø€ Ü‰Ñ˜Ô ä$ VÓ,ˆŒÜ*¨6Ó2ˆŒØ#×1Ñ1ˆÔà—o‘o×1Ñ1ˆà××Ü*¨6Ó2ˆDLäŸ;š;›=ˆDŒLÜ& vÑGˆŒð 	‰Õr4   rÄ   Úpast_observed_maskÚfuture_valuesrK  rP   Úreturn_dictrQ   c           
      óŽ  • Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R                  nUc  [        R
                  " U5      nU R                  X5      u  pxn	U R                  U5      n
U R                  (       a  U R                  U
5      u  p¼OU R                  U
5      SpËU R                  X´US9nU(       d<  UR                  UR                  UR                  4nXìX‰U
4-   n[        S U 5       5      $ [        UR                  UR                  UR                  UUU	U
S9$ )aï  
Parameters:
    past_values (`torch.Tensor` of shape `(bs, sequence_length, num_input_channels)`, *required*):
        Input sequence to the model
    past_observed_mask (`torch.BoolTensor` of shape `(batch_size, sequence_length, num_input_channels)`, *optional*):
        Boolean mask to indicate which `past_values` were observed and which were missing. Mask values selected
        in `[0, 1]`:

        - 1 for values that are **observed**,
        - 0 for values that are **missing** (i.e. NaNs that were replaced by zeros).
    future_values (`torch.BoolTensor` of shape `(batch_size, prediction_length, num_input_channels)`, *optional*):
        Future target values associated with the `past_values`
    output_hidden_states (`bool`, *optional*):
        Whether or not to return the hidden states of all layers
    output_attentions (`bool`, *optional*):
        Whether or not to return the output attention of all layers
    return_dict (`bool`, *optional*):
        Whether or not to return a `ModelOutput` instead of a plain tuple.

Returns:
    `PatchTSTModelOutput` or tuple of `torch.Tensor` (if `return_dict`=False or `config.return_dict`=False)

Examples:

```python
>>> from huggingface_hub import hf_hub_download
>>> import torch
>>> from transformers import PatchTSTModel

>>> file = hf_hub_download(
...     repo_id="hf-internal-testing/etth1-hourly-batch", filename="train-batch.pt", repo_type="dataset"
... )
>>> batch = torch.load(file)

>>> model = PatchTSTModel.from_pretrained("namctin/patchtst_etth1_pretrain")

>>> # during training, one provides both past and future values
>>> outputs = model(
...     past_values=batch["past_values"],
...     future_values=batch["future_values"],
... )

>>> last_hidden_state = outputs.last_hidden_state
```N)rÓ   rK  rP   c              3   ó,   #   • U H  oc  M  Uv •  M     g 7frÏ   r¡   )Ú.0Úvs     r2   Ú	<genexpr>Ú(PatchTSTModel.forward.<locals>.<genexpr>Ž  s   é € Ð=¡G˜qŸ™¢Gùs   ‚‹	)rM  rM   rN  r™   rX  rY  rÓ   )r=   Úuse_return_dictrP   rK  r(   r–  r§  r¯  r°  r±  r²  rM  rM   rN  rn   rW  )rJ   rÄ   r´  rµ  rK  rP   r¶  Úscaled_past_valuesrX  rY  Úpatched_valuesÚmasked_valuesr™   Úencoder_outputrø   s                  r2   rc   ÚPatchTSTModel.forward>  sK  € ðl &1Ñ%<‘kÀ$Ç+Á+×B]ÑB]ˆØ1BÑ1NÑ-ÐTX×T_ÑT_×TqÑTqÐà$8Ñ$DÑ È$Ï+É+×JjÑJjð 	ð Ñ%Ü!&§¢°Ó!=Ðð *.¯©°[Ó)UÑ&Ð ð Ÿ™Ð);Ó<ˆØ××Ø"&§,¡,¨~Ó">ÑˆM˜4à"&§,¡,¨~Ó">À˜4àŸ™Ø%Ðduð &ð 
ˆö Ø%×7Ñ7¸×9UÑ9UÐWe×WpÑWpÐqˆGØ s°>Ð BÑBˆGÜÑ=¡GÓ=Ó=Ð=ä"Ø,×>Ñ>Ø(×6Ñ6Ø%×0Ñ0ØØØØ&ñ
ð 	
r4   )r°  r²  r±  r¯  r§  ©NNNNN)re   rf   rg   rh   r   rB   r(   rm   r   rl   r   rn   rW  rc   ro   rp   rq   s   @r2   r­  r­  *  s¤   ø† ð˜~÷ ð* 6:Ø04Ø/3Ø,0Ø&*ñZ
à—\‘\ðZ
ð % U§\¡\Ñ2ðZ
ð   §¡Ñ-ð	Z
ð
 ' t™nðZ
ð $ D™>ðZ
ð ˜d‘^ðZ
ð 
ˆuÐ)Ð)Ñ	*÷Z
ó Z
r4   r­  c                   ón   ^ • \ rS rSrSrS\4U 4S jjrS\R                  S\R                  4S jr	Sr
U =r$ )	ÚPatchTSTMaskPretrainHeadi›  z%
Pretraining head for mask modelling
r=   c                 ó8  >• [         TU ]  5         UR                  S:”  a   [        R                  " UR                  5      O[        R
                  " 5       U l        [        R                  " UR                  UR                  5      U l
        UR                  U l        g ©Nr   )rA   rB   Úhead_dropoutr   rã   rä   r   rE   ry   r±   Úlinearr  r|   s     €r2   rB   Ú!PatchTSTMaskPretrainHead.__init__   sh   ø€ Ü‰ÑÔØ:@×:MÑ:MÐPQÓ:Q”r—z’z &×"5Ñ"5Ô6ÔWY×WbÒWbÓWdˆŒÜ—i’i §¡°×0CÑ0CÓDˆŒØ#×1Ñ1ˆÕr4   Ú	embeddingrQ   c                 óŠ   • U R                  U R                  U5      5      nU R                  (       a  USS2SS2SS2SS24   nU$ )a›  
Parameters:
    embedding (`torch.Tensor` of shape `(bs, num_channels, num_patches, d_model)` or
            `(bs, num_channels, num_patches+1, d_model)` if `cls_token` is set to True, *required*):
        Embedding from the model
Returns:
    `torch.Tensor` of shape `(bs, num_channels, num_patches, d_model)` or
                    `(bs, num_channels, num_patches+1, d_model)` if `cls_token` is set to True

Nr   )rÉ  r   r  )rJ   rË  s     r2   rc   Ú PatchTSTMaskPretrainHead.forward¦  s>   € ð —K‘K §¡¨YÓ 7Ó8ˆ	Ø××Ø!¢!¢Q¨©ªA +Ñ.ˆIØÐr4   )r   rÉ  r  r‚   rq   s   @r2   rÅ  rÅ  ›  s4   ø† ñð2˜~÷ 2ð §¡ð °%·,±,÷ ò r4   rÅ  z*
    The PatchTST for pretrain model.
    c                   ó¨   ^ • \ rS rSrS\4U 4S jjr    SS\R                  S\\R                     S\\	   S\\	   S\\	   S	\
\\4   4S
 jjrSrU =r$ )ÚPatchTSTForPretrainingi·  r=   c                 óŽ   >• [         TU ]  U5        SUl        [        US9U l        [        U5      U l        U R                  5         g )NT)r=   )rA   rB   r°  r­  rü   rÅ  ÚheadrI  r|   s     €r2   rB   ÚPatchTSTForPretraining.__init__½  s<   ø€ Ü‰Ñ˜Ô à#ˆÔÜ"¨&Ñ1ˆŒ
Ü,¨VÓ4ˆŒ	ð 	‰Õr4   rÄ   r´  rK  rP   r¶  rQ   c                 óî  • Ub  UOU R                   R                  nU R                  UUUUSS9nU R                  UR                  5      n[
        R                  " SS9nU" XvR                  5      n	U	R                  SS9UR                  -  R                  5       UR                  R                  5       S-   -  n
UR                  nU(       d  U4USS	 -   nU
b  U
4U-   nU$ UnU$ [        X§X¶R                  S
9$ )a  
Parameters:
    past_values (`torch.Tensor` of shape `(bs, sequence_length, num_input_channels)`, *required*):
        Input sequence to the model
    past_observed_mask (`torch.BoolTensor` of shape `(batch_size, sequence_length, num_input_channels)`, *optional*):
        Boolean mask to indicate which `past_values` were observed and which were missing. Mask values selected
        in `[0, 1]`:

        - 1 for values that are **observed**,
        - 0 for values that are **missing** (i.e. NaNs that were replaced by zeros).
    output_hidden_states (`bool`, *optional*):
        Whether or not to return the hidden states of all layers
    output_attentions (`bool`, *optional*):
        Whether or not to return the output attention of all layers
    return_dict (`bool`, *optional*): Whether or not to return a `ModelOutput` instead of a plain tuple.

Returns:
    `PatchTSTForPretrainingOutput` or tuple of `torch.Tensor` (if `return_dict`=False or
    `config.return_dict`=False)

Examples:

```python
>>> from huggingface_hub import hf_hub_download
>>> import torch
>>> from transformers import PatchTSTConfig, PatchTSTForPretraining

>>> file = hf_hub_download(
...     repo_id="hf-internal-testing/etth1-hourly-batch", filename="train-batch.pt", repo_type="dataset"
... )
>>> batch = torch.load(file)

>>> # Config for random mask pretraining
>>> config = PatchTSTConfig(
...     num_input_channels=7,
...     context_length=512,
...     patch_length=12,
...     stride=12,
...     mask_type='random',
...     random_mask_ratio=0.4,
...     use_cls_token=True,
... )
>>> # Config for forecast mask pretraining
>>> config = PatchTSTConfig(
...     num_input_channels=7,
...     context_length=512,
...     patch_length=12,
...     stride=12,
...     mask_type='forecast',
...     num_forecast_mask_patches=5,
...     use_cls_token=True,
... )
>>> model = PatchTSTForPretraining(config)

>>> # during training, one provides both past and future values
>>> outputs = model(past_values=batch["past_values"])

>>> loss = outputs.loss
>>> loss.backward()
```T©rÄ   r´  rK  rP   r¶  Únone©Ú	reductionr    r#   r  r   éüÿÿÿ)r]  r^  rM   rN  )r=   r½  rü   rÑ  rM  r   ÚMSELossrÓ   r   r™   r§   rM   r\  rN  )rJ   rÄ   r´  rK  rP   r¶  Úmodel_outputÚx_hatr]  Úloss_valÚmasked_lossrO  rø   s                r2   rc   ÚPatchTSTForPretraining.forwardÇ  s  € ðJ &1Ñ%<‘kÀ$Ç+Á+×B]ÑB]ˆð —z‘zØ#Ø1Ø!5Ø/Øð "ð 
ˆð —	‘	˜,×8Ñ8Ó9ˆô zŠz FÑ+ˆÙ˜×7Ñ7Ó8ˆØ—}‘}¨}Ð,¨|×/@Ñ/@Ñ@×EÑEÓGÈ<×K\ÑK\×K`ÑK`ÓKbÐejÑKjÑkˆà%×3Ñ3ˆÞØh ¨a°Ð!3Ñ3ˆGØ2=Ñ2I{n wÑ.ˆGØˆNð PWˆGØˆNÜ+ØÀ^×`wÑ`wñ
ð 	
r4   ©rÑ  rü   )NNNN)re   rf   rg   rh   r   rB   r(   rm   r   rl   r   rn   r\  rc   ro   rp   rq   s   @r2   rÏ  rÏ  ·  s   ø† ð˜~÷ ð 6:Ø/3Ø,0Ø&*ña
à—\‘\ða
ð % U§\¡\Ñ2ða
ð ' t™nð	a
ð
 $ D™>ða
ð ˜d‘^ða
ð 
ˆuÐ2Ð2Ñ	3÷a
ó a
r4   rÏ  c                   óR   ^ • \ rS rSrS\4U 4S jjrS\R                  4S jrSr	U =r
$ )ÚPatchTSTClassificationHeadi+  r=   c                 ó¦  >• [         TU ]  5         UR                  U l        UR                  U l        [        R
                  " SS9U l        UR                  S:”  a   [        R                  " UR                  5      O[        R                  " 5       U l
        [        R                  " UR                  UR                  -  UR                  5      U l        g ©Nr   ©Ú	start_dimr   )rA   rB   r  Úpooling_typer   ÚFlattenÚflattenrÈ  rã   rä   r   rE   rö   ry   Únum_targetsrÉ  r|   s     €r2   rB   Ú#PatchTSTClassificationHead.__init__,  s‘   ø€ Ü‰ÑÔØ#×1Ñ1ˆÔØ"×/Ñ/ˆÔÜ—z’z¨AÑ.ˆŒØ:@×:MÑ:MÐPQÓ:Q”r—z’z &×"5Ñ"5Ô6ÔWY×WbÒWbÓWdˆŒÜ—i’i × 9Ñ 9¸F¿N¹NÑ JÈF×L^ÑL^Ó_ˆr4   rË  c                 óp  • U R                   (       a  USS2SS2SSS24   nOcU R                  S:X  a  UR                  SS9nOCU R                  S:X  a  UR                  SS9R                  nO[        SU R                   S35      eU R                  U5      nU R                  U R                  U5      5      nU$ )	a#  
Parameters:
    embedding (`torch.Tensor` of shape `(bs, num_channels, num_patches, d_model)` or
             `(bs, num_channels, num_patches+1, d_model)` if `cls_token` is set to True, *required*):
        Embedding from the model
Returns:
    `torch.Tensor` of shape `(bs, num_targets)`

Nr   r   r"   r#   r¿   úpooling operator ú is not implemented yet)	r  ræ  r   r¿   ÚvaluesrD   rè  rÉ  r   ©rJ   rË  Úpooled_embeddingr€   s       r2   rc   Ú"PatchTSTClassificationHead.forward4  s®   € ð ××à(ªªA¨q²!¨Ñ4ÑØ×Ñ &Ó(à(Ÿ~™~°!˜~Ð4ÑØ×Ñ %Ó'à(Ÿ}™}°˜}Ð3×:Ñ:ÑäÐ0°×1BÑ1BÐ0CÐCZÐ[Ó\Ð\àŸ<™<Ð(8Ó9Ðà—‘˜TŸ\™\Ð*:Ó;Ó<ˆØˆr4   )r   rè  rÉ  ræ  r  r&  rq   s   @r2   rá  rá  +  s&   ø† ð`˜~÷ `ð §¡÷ ò r4   rá  z0
    The PatchTST for classification model.
    c                   ó¾   ^ • \ rS rSrS\4U 4S jjr\     SS\R                  S\	\R                     S\	\
   S\	\
   S\	\
   S	\	\
   S
\\\4   4S jj5       rSrU =r$ )ÚPatchTSTForClassificationiP  r=   c                 óÞ   >• [         TU ]  U5        UR                  (       a  [        R	                  S5        SUl        [        U5      U l        [        U5      U l        U R                  5         g )Nú+Setting `do_mask_input` parameter to False.F)
rA   rB   r°  ÚloggerÚwarningr­  rü   rá  rÑ  rI  r|   s     €r2   rB   Ú"PatchTSTForClassification.__init__V  sT   ø€ Ü‰Ñ˜Ô ð ××ÜN‰NÐHÔIØ#(ˆFÔ ä" 6Ó*ˆŒ
Ü.¨vÓ6ˆŒ	ð 	‰Õr4   rÄ   Útarget_valuesr´  rK  rP   r¶  rQ   c                 óV  • Ub  UOU R                   R                  nU R                  UUUUSS9nU R                  UR                  5      nSn	Ub  [
        R                  " 5       n
U
" X‚5      n	U(       d  U4USS -   nU	b  U	4U-   nU$ UnU$ [        U	UUR                  UR                  S9$ )a‹  
past_values (`torch.Tensor` of shape `(bs, sequence_length, num_input_channels)`, *required*):
    Input sequence to the model
target_values (`torch.Tensor`, *optional*):
    Labels associates with the `past_values`
past_observed_mask (`torch.BoolTensor` of shape `(batch_size, sequence_length, num_input_channels)`, *optional*):
    Boolean mask to indicate which `past_values` were observed and which were missing. Mask values selected
    in `[0, 1]`:

    - 1 for values that are **observed**,
    - 0 for values that are **missing** (i.e. NaNs that were replaced by zeros).

Examples:

```python
>>> from transformers import PatchTSTConfig, PatchTSTForClassification

>>> # classification task with two input channel2 and 3 classes
>>> config = PatchTSTConfig(
...     num_input_channels=2,
...     num_targets=3,
...     context_length=512,
...     patch_length=12,
...     stride=12,
...     use_cls_token=True,
... )
>>> model = PatchTSTForClassification(config=config)

>>> # during inference, one only provides past values
>>> past_values = torch.randn(20, 512, 2)
>>> outputs = model(past_values=past_values)
>>> labels = outputs.prediction_logits
```NTrÔ  r   rÉ   )r]  rg  rM   rN  )
r=   r½  rü   rÑ  rM  r   ÚCrossEntropyLossrf  rM   rN  )rJ   rÄ   rù  r´  rK  rP   r¶  rÚ  Úy_hatrÜ  r]  rø   s               r2   rc   Ú!PatchTSTForClassification.forwardd  sÕ   € ðX &1Ñ%<‘kÀ$Ç+Á+×B]ÑB]ˆà—z‘zØ#Ø1Ø!5Ø/Øð "ð 
ˆð —	‘	˜,×8Ñ8Ó9ˆàˆØÑ$Ü×&Ò&Ó(ˆDÙ˜EÓ1ˆHæØh ¨a°Ð!3Ñ3ˆGØ/7Ñ/Cxk GÑ+ˆGØˆNð JQˆGØˆNÜ.ØØ#Ø&×4Ñ4Ø#×.Ñ.ñ	
ð 	
r4   rß  rÃ  )re   rf   rg   rh   r   rB   r   r(   rm   r   rl   r   rn   rf  rc   ro   rp   rq   s   @r2   ró  ró  P  s©   ø† ð˜~÷ ð ð 15Ø-1Ø/3Ø,0Ø&*ñD
à—\‘\ðD
ð   §¡Ñ-ðD
ð % T™Nð	D
ð
 ' t™nðD
ð $ D™>ðD
ð ˜d‘^ðD
ð 
ˆuÐ5Ð5Ñ	6ôD
ó öD
r4   ró  z,
    The PatchTST for regression Model.
    c                   óZ   ^ • \ rS rSrSS\S\4U 4S jjjrS\R                  4S jr	Sr
U =r$ )	ÚPatchTSTPredictionHeadi¬  r=   rÀ   c                 óH  >• [         TU ]  5         UR                  U l        UR                  U l        UR                  U l        UR
                  U l        U R
                  (       d  U R                  (       a  UR                  nOUR                  U-  nU R                  (       Gd]  [        R                  " 5       U l	        [        R                  " 5       U l
        [        R                  " 5       U l        [        U R                  5       Hõ  nU R                  R                  [        R                  " SS95        Uc:  U R                  R                  [        R                  " XAR                   5      5        O*U R                  R                  UR#                  U5      5        U R                  R                  UR$                  S:”  a   [        R&                  " UR$                  5      O[        R(                  " 5       5        M÷     g[        R                  " SS9U l        Uc&  [        R                  " XAR                   5      U l        OUR#                  U5      U l        UR$                  S:”  a   [        R&                  " UR$                  5      O[        R(                  " 5       U l        g)zè
num_patches (`int`):
    The number of patches in the input sequence.
distribution_output (`DistributionOutput`, *optional*):
    The distribution output layer for probabilistic forecasting. If None, a linear output layer is used.
r"   rä  Nr   )rA   rB   Úshare_projectionrö   r  ræ  ry   r   r  ÚprojectionsÚdropoutsÚflattensr  r©   rç  rE   Úprediction_lengthÚget_parameter_projectionrÈ  rã   rä   rè  Ú
projectionr   )rJ   r=   rÀ   Údistribution_outputrC   r$  rK   s         €r2   rB   ÚPatchTSTPredictionHead.__init__²  sÍ  ø€ ô 	‰ÑÔà &× 7Ñ 7ˆÔØ"(×";Ñ";ˆÔØ#×1Ñ1ˆÔØ"×/Ñ/ˆÔØ×× × 2× 2Ø—~‘~‰Hà—~‘~¨Ñ3ˆHà×$×$Ð$ä!Ÿ}š}›ˆDÔÜŸMšM›OˆDŒMÜŸMšM›OˆDŒMÜ˜4×2Ñ2Ö3Ø—‘×$Ñ$¤R§Z¢Z¸!Ñ%<Ô=Ø&Ñ.à×$Ñ$×+Ñ+¬B¯IªI°h×@XÑ@XÓ,YÕZð ×$Ñ$×+Ñ+Ð,?×,XÑ,XÐYaÓ,bÔcØ—‘×$Ñ$È×H[ÑH[Ð^_ÓH_¤R§Z¢Z°×0CÑ0CÔ%DÔeg×epÒepÓerÖsò 4ô Ÿ:š:°Ñ2ˆDŒLØ"Ñ*ä"$§)¢)¨H×6NÑ6NÓ"O•ð #6×"NÑ"NÈxÓ"X”Ø>D×>QÑ>QÐTUÓ>Uœ2Ÿ:š: f×&9Ñ&9Ô:Ô[]×[fÒ[fÓ[hˆDLr4   rË  c                 óú  • U R                   (       a  USS2SS2SSS24   nOLU R                  S:X  a  UR                  SS9nO,U R                  S:X  a  UR                  SS9R                  nOUnU R
                  (       dŽ  / n[        U R                  5       H]  nU R                  U   " USS2USS24   5      nU R                  U   " U5      nU R                  U   " U5      nUR                  U5        M_     [        R                  " USS9nO3U R                  U5      nU R                  U5      nU R!                  U5      n[#        U[$        5      (       a  [%        S U 5       5      nU$ UR'                  SS5      nU$ )	a2  
Parameters:
    embedding (`torch.Tensor` of shape `(bs, num_channels, num_patches, d_model)` or
             `(bs, num_channels, num_patches+1, d_model)` if `cls_token` is set to True, *required*):
        Embedding from the model
Returns:
    `torch.Tensor` of shape `(bs, forecast_len, num_channels)`

Nr   r   r"   r#   r¿   r   c              3   óB   #   • U H  oR                  S S5      v •  M     g7f)r"   r   N)r*   )r¹  Úzs     r2   r»  Ú1PatchTSTPredictionHead.forward.<locals>.<genexpr>  s   é € Ð=±f°Ÿ;™; q¨!×,Ð,²fùs   ‚)r  ræ  r   r¿   rî  r  r  rö   r  r  r  r©   r(   r"  rè  r   r  r¥   rn   r*   )rJ   rË  rð  r€   r$  s        r2   rc   ÚPatchTSTPredictionHead.forwardÝ  sl  € ð ××à(ªªA¨q²!¨Ñ4Ñà× Ñ  FÓ*à#,§>¡>°a >Ð#8Ñ Ø×"Ñ" eÓ+à#,§=¡=°Q =Ð#7×#>Ñ#>Ñ ð $-Ð à×$×$ØˆFÜ˜4×2Ñ2Ö3à#'§=¡=°Ò#3Ð4DÂQÈÊ1ÀWÑ4MÓ#NÐ Ø#'§=¡=°Ò#3Ð4DÓ#EÐ ð $(×#3Ñ#3°AÒ#6Ð7GÓ#HÐ Ø—‘Ð.Ö/ñ 4ô —[’[ ¨QÑ/‰Fð  $Ÿ|™|Ð,<Ó=ÐØ#Ÿ|™|Ð,<Ó=Ðð —_‘_Ð%5Ó6ˆFäfœe×$Ñ$äÑ=±fÓ=Ó=ˆFð ˆð ×%Ñ% a¨Ó+ˆFØˆr4   )
r   r  rè  r  rö   ræ  r  r  r  r  rÏ   )re   rf   rg   rh   r   rj   rB   r(   rm   rc   ro   rp   rq   s   @r2   rÿ  rÿ  ¬  s5   ø† ñ)i˜~ð )i¸C÷ )ið )iðV1 §¡÷ 1ò 1r4   rÿ  z,
    The PatchTST for prediction model.
    c                   ó:  ^ • \ rS rSrS\4U 4S jjr     SS\R                  S\\R                     S\\R                     S\\	   S\\	   S	\\	   S
\
\\4   4S jjr\R                  " 5        SS\R                  S\\R                     S
\4S jj5       rSrU =r$ )ÚPatchTSTForPredictioni  r=   c                 ó‚  >• [         TU ]  U5        UR                  (       a  [        R	                  S5        SUl        [        U5      U l        UR                  S:X  a  S U l        O“UR                  S:X  a  [        UR                  S9U l        OjUR                  S:X  a  [        UR                  S9U l        OAUR                  S:X  a  [        UR                  S9U l        O[        SUR                   35      e[        XR                  R                  R                   U R                  S	9U l        U R%                  5         g )
Nrõ  FÚmseÚ	student_tr#   ÚnormalÚnegative_binomialúUnknown distribution output )r  )rA   rB   r°  rö  r÷  r­  rü   r]  r  r   r  r   r   rD   rÿ  r¯  rÀ   rÑ  rI  r|   s     €r2   rB   ÚPatchTSTForPrediction.__init__  s  ø€ Ü‰Ñ˜Ô ð ××ÜN‰NÐHÔIØ#(ˆFÔ ä" 6Ó*ˆŒ
à;‰;˜%ÓØ'+ˆDÕ$à×)Ñ)¨[Ó8Ü+9¸f×>VÑ>VÑ+WÕ(Ø×+Ñ+¨xÓ7Ü+7¸F×<TÑ<TÑ+UÕ(Ø×+Ñ+Ð/BÓBÜ+AÀf×F^ÑF^Ñ+_Õ(ä Ð#?À×@ZÑ@ZÐ?[Ð!\Ó]Ð]ä*Ø—J‘J×)Ñ)×5Ñ5È4×KcÑKcñ
ˆŒ	ð
 	‰Õr4   rÄ   r´  rµ  rK  rP   r¶  rQ   c           	      ó’  • Ub  UOU R                   R                  nU R                  UUUUSS9nU R                  UR                  5      nSn	U R
                  (       a  Un
OX‡R                  -  UR                  -   n
Ubr  U R
                  (       aE  U R
                  R                  X‡R                  UR                  S9n[        X³5      n	[        U	5      n	O[        R                  " SS9nU" X£5      n	UR                  nUR                  nU(       d  U
4USS -   nU	b  U	4U-   nU$ UnU$ [        U	U
UR                  UR                  UUS	9$ )
a  
Parameters:
    past_values (`torch.Tensor` of shape `(bs, sequence_length, num_input_channels)`, *required*):
        Input sequence to the model
    past_observed_mask (`torch.BoolTensor` of shape `(batch_size, sequence_length, num_input_channels)`, *optional*):
        Boolean mask to indicate which `past_values` were observed and which were missing. Mask values selected
        in `[0, 1]`:

        - 1 for values that are **observed**,
        - 0 for values that are **missing** (i.e. NaNs that were replaced by zeros).
    future_values (`torch.Tensor` of shape `(bs, forecast_len, num_input_channels)`, *optional*):
        Future target values associated with the `past_values`
    output_hidden_states (`bool`, *optional*):
        Whether or not to return the hidden states of all layers
    output_attentions (`bool`, *optional*):
        Whether or not to return the output attention of all layers
    return_dict (`bool`, *optional*):
        Whether or not to return a `ModelOutput` instead of a plain tuple.

Returns:
    `PatchTSTForPredictionOutput` or tuple of `torch.Tensor` (if `return_dict`=False or
    `config.return_dict`=False)

Examples:

```python
>>> from huggingface_hub import hf_hub_download
>>> import torch
>>> from transformers import PatchTSTConfig, PatchTSTForPrediction

>>> file = hf_hub_download(
...     repo_id="hf-internal-testing/etth1-hourly-batch", filename="train-batch.pt", repo_type="dataset"
... )
>>> batch = torch.load(file)

>>> # Prediction task with 7 input channels and prediction length is 96
>>> model = PatchTSTForPrediction.from_pretrained("namctin/patchtst_etth1_forecast")

>>> # during training, one provides both past and future values
>>> outputs = model(
...     past_values=batch["past_values"],
...     future_values=batch["future_values"],
... )

>>> loss = outputs.loss
>>> loss.backward()

>>> # during inference, one only provides past values, the model outputs future values
>>> outputs = model(past_values=batch["past_values"])
>>> prediction_outputs = outputs.prediction_outputs
```NTrÔ  ©rX  rY  r   rÖ  r   r    )r]  rd  rM   rN  rX  rY  )r=   r½  rü   rÑ  rM  r  rY  rX  Údistributionro  rz  r   rÙ  rc  rM   rN  )rJ   rÄ   r´  rµ  rK  rP   r¶  rÚ  rü  rÜ  Ú	y_hat_outr  r]  rX  rY  rø   s                   r2   rc   ÚPatchTSTForPrediction.forward4  sf  € ðz &1Ñ%<‘kÀ$Ç+Á+×B]ÑB]ˆð —z‘zØ#Ø1Ø!5Ø/Øð "ð 
ˆð —	‘	˜,×8Ñ8Ó9ˆàˆà×#×#Ø‰Ià× 2Ñ 2Ñ2°\×5EÑ5EÑEˆIàÑ$Ø×'×'Ø#×7Ñ7×DÑDØ×/Ñ/°|×7IÑ7Ið  Eð  ô ˜|Ó;ä+¨HÓ5‘ä—z’z¨FÑ3Ù 	Ó9à×ÑˆØ×"Ñ"ˆæØ l \°!°BÐ%7Ñ7ˆGØ/7Ñ/Cxk GÑ+ˆGØˆNð JQˆGØˆNÜ*ØØ(Ø&×4Ñ4Ø#×.Ñ.ØØñ
ð 	
r4   c                 ó   • U R                   R                  nU " USUSS9nU R                  (       av  U R                  R                  UR                  UR
                  UR                  S9n[        U5       Vs/ sH  oeR                  5       PM     nn[        R                  " USS9nOUR                  R                  S5      n[        US9$ s  snf )a°  
Generate sequences of sample predictions from a model with a probability distribution head.

Parameters:
    past_values (`torch.FloatTensor` of shape `(batch_size, sequence_length, num_input_channels)`):
        Past values of the time series that serves as context in order to predict the future.
    past_observed_mask (`torch.BoolTensor` of shape `(batch_size, sequence_length, num_input_channels)`, *optional*):
        Boolean mask to indicate which `past_values` were observed and which were missing. Mask values selected
        in `[0, 1]`:

        - 1 for values that are **observed**,
        - 0 for values that are **missing** (i.e. NaNs that were replaced by zeros).

Return:
    [`SamplePatchTSTOutput`] where the outputs `sequences` tensor will have shape `(batch_size, number of
    samples, prediction_length, 1)` or `(batch_size, number of samples, prediction_length, num_input_channels)`
    for multivariate predictions.
NF)rÄ   rµ  r´  rK  r  r   r#   ©rj  )r=   Únum_parallel_samplesr  r  rd  rX  rY  r  Úsampler(   r"  r‘   ri  ©rJ   rÄ   r´  r  rø   r  r¬   Úsampless           r2   ÚgenerateÚPatchTSTForPrediction.generate¡  sÄ   € ð2  $Ÿ{™{×?Ñ?Ðñ Ø#ØØ1Ø!&ñ	
ˆð ×#×#à×3Ñ3×@Ñ@Ø×*Ñ*°·±À7Ç=Á=ð Að ˆLô 7<Ð<PÔ6QÓRÑ6Q°×*Ñ*Ö,Ñ6QˆGÐRä—k’k '¨qÑ1‰Gà×0Ñ0×:Ñ:¸1Ó=ˆGä#¨gÑ6Ð6ùò Ss   Á7C©r  rÑ  rü   rÃ  rÏ   )re   rf   rg   rh   r   rB   r(   rm   r   rl   r   rn   rc  rc   Úno_gradri  r#  ro   rp   rq   s   @r2   r  r    sè   ø† ð˜~÷ ð@ 6:Ø04Ø/3Ø,0Ø&*ñk
à—\‘\ðk
ð % U§\¡\Ñ2ðk
ð   §¡Ñ-ð	k
ð
 ' t™nðk
ð $ D™>ðk
ð ˜d‘^ðk
ð 
ˆuÐ1Ð1Ñ	2õk
ðZ ‡]‚]ƒ_ð 6:ñ-7à—\‘\ð-7ð % U§\¡\Ñ2ð-7ð 
ô	-7ó ö-7r4   r  c                   óZ   ^ • \ rS rSrSrSS\4U 4S jjjrS\R                  4S jr	Sr
U =r$ )	ÚPatchTSTRegressionHeadiÒ  z
Regression head
r=   c                 ó
  >• [         TU ]  5         UR                  U l        UR                  U l        UR
                  U l        X l        UR                  UR                  -  n[        R                  " SS9U l        UR                  S:”  a   [        R                  " UR                  5      O[        R                  " 5       U l        Uc&  [        R                   " X1R"                  5      U l        g UR'                  U5      U l        g rã  )rA   rB   Úoutput_rangeÚy_ranger  ræ  r  rö   ry   r   rç  rè  rÈ  rã   rä   r   rE   ré  r  r  )rJ   r=   r  rC   rK   s       €r2   rB   ÚPatchTSTRegressionHead.__init__×  s¿   ø€ Ü‰ÑÔØ×*Ñ*ˆŒØ#×1Ñ1ˆÔØ"×/Ñ/ˆÔØ#6Ô à×,Ñ,¨v¯~©~Ñ=ˆä—z’z¨AÑ.ˆŒØ:@×:MÑ:MÐPQÓ:Q”r—z’z &×"5Ñ"5Ô6ÔWY×WbÒWbÓWdˆŒàÑ&Ü Ÿiši¨×2DÑ2DÓEˆDOà1×JÑJÈ8ÓTˆDOr4   rË  c                 ó@  • U R                   (       a  USS2SS2SSS24   nOcU R                  S:X  a  UR                  SS9nOCU R                  S:X  a  UR                  SS9R                  nO[        SU R                   S35      eU R                  U R                  U5      5      nU R                  U5      nU R                  SL U R                  SL-  (       aF  [        R                  " U5      U R                  S	   U R                  S   -
  -  U R                  S   -   nU$ )
a!  
Parameters:
    embedding (`torch.Tensor` of shape `(bs, num_channels, num_patches, d_model)` or
            `(bs, num_channels, num_patches+1, d_model)` if `cls_token` is set to True, *required*):
        Embedding from the model
Returns:
    `torch.Tensor` of shape `(bs, output_dim)`

Nr   r   r"   r#   r¿   rì  rí  r   )r  ræ  r   r¿   rî  rD   r   rè  r  r  r+  r(   Úsigmoidrï  s       r2   rc   ÚPatchTSTRegressionHead.forwardè  s  € ð ××à(ªªA¨q²!¨Ñ4ÑØ×Ñ &Ó(à(Ÿ~™~°!˜~Ð4ÑØ×Ñ %Ó'à(Ÿ}™}°˜}Ð3×:Ñ:ÑäÐ0°×1BÑ1BÐ0CÐCZÐ[Ó\Ð\ð  Ÿ<™<¨¯©Ð5EÓ(FÓGÐð —‘Ð!1Ó2ˆà×$Ñ$¨Ð,°·±ÀTÐ1I×JÜ—]’] 6Ó*¨d¯l©l¸1©oÀÇÁÈQÁÑ.OÑPÐSW×S_ÑS_Ð`aÑSbÑbˆFØˆr4   )r  r   rè  ræ  r  r  r+  rÏ   r‚   rq   s   @r2   r(  r(  Ò  s1   ø† ññU˜~÷ Uð Uð" §¡÷ ò r4   r(  z,
    The PatchTST for regression model.
    c                   óD  ^ • \ rS rSrS\4U 4S jjr\     SS\R                  S\	\R                     S\	\R                     S\	\
   S\	\
   S	\	\
   S
\\\4   4S jj5       r\R                  " 5        SS\R                  S\	\R                     S
\4S jj5       rSrU =r$ )ÚPatchTSTForRegressioni	  r=   c                 óH  >• [         TU ]  U5        UR                  (       a  [        R	                  S5        SUl        [        U5      U l        UR                  S:X  a  S U l        O“UR                  S:X  a  [        UR                  S9U l        OjUR                  S:X  a  [        UR                  S9U l        OAUR                  S:X  a  [        UR                  S9U l        O[        SUR                   35      e[        XR                  5      U l        U R!                  5         g )	Nrõ  Fr  r  r#   r  r  r  )rA   rB   r°  rö  r÷  r­  rü   r]  r  r   ré  r   r   rD   r(  rÑ  rI  r|   s     €r2   rB   ÚPatchTSTForRegression.__init__  sì   ø€ Ü‰Ñ˜Ô ð ××ÜN‰NÐHÔIØ#(ˆFÔ ä" 6Ó*ˆŒ
Ø;‰;˜%ÓØ'+ˆDÕ$à×)Ñ)¨[Ó8Ü+9¸f×>PÑ>PÑ+QÕ(Ø×+Ñ+¨xÓ7Ü+7¸F×<NÑ<NÑ+OÕ(Ø×+Ñ+Ð/BÓBÜ+AÀf×FXÑFXÑ+YÕ(ä Ð#?À×@ZÑ@ZÐ?[Ð!\Ó]Ð]ä*¨6×3KÑ3KÓLˆŒ	ð 	‰Õr4   rÄ   rù  r´  rK  rP   r¶  rQ   c           	      ó`  • Ub  UOU R                   R                  nU R                  UUUUSS9nU R                  UR                  5      nSn	Ub  U R
                  (       ap  U R
                  R                  U5      n
[        U Vs/ sH(  o»R                  SU R                   R                  5      PM*     sn5      n[        X¢5      n	[        U	5      n	O[        R                  " SS9n	U	" X‚5      n	U(       d  U4USS -   nU	b  U	4U-   nU$ UnU$ [        U	UUR                  UR                   S	9$ s  snf )
aƒ  
past_values (`torch.Tensor` of shape `(bs, sequence_length, num_input_channels)`, *required*):
    Input sequence to the model
target_values (`torch.Tensor` of shape `(bs, num_input_channels)`):
    Target values associates with the `past_values`
past_observed_mask (`torch.BoolTensor` of shape `(batch_size, sequence_length, num_input_channels)`, *optional*):
    Boolean mask to indicate which `past_values` were observed and which were missing. Mask values selected
    in `[0, 1]`:

    - 1 for values that are **observed**,
    - 0 for values that are **missing** (i.e. NaNs that were replaced by zeros).
    Whether or not to return a `ModelOutput` instead of a plain tuple.

Examples:

```python
>>> from transformers import PatchTSTConfig, PatchTSTForRegression

>>> # Regression task with 6 input channels and regress 2 targets
>>> model = PatchTSTForRegression.from_pretrained("namctin/patchtst_etth1_regression")

>>> # during inference, one only provides past values, the model outputs future values
>>> past_values = torch.randn(20, 512, 6)
>>> outputs = model(past_values=past_values)
>>> regression_outputs = outputs.regression_outputs
```NTrÔ  r    r   rÖ  r   rÉ   )r]  ra  rM   rN  )r=   r½  rü   rÑ  rM  r  r  rn   r-   ré  ro  rz  r   rÙ  r`  rM   rN  )rJ   rÄ   rù  r´  rK  rP   r¶  rÚ  rü  r]  r  Úitemrø   s                r2   rc   ÚPatchTSTForRegression.forward)  s=  € ðJ &1Ñ%<‘kÀ$Ç+Á+×B]ÑB]ˆà—z‘zØ#Ø1Ø!5Ø/Øð "ð 
ˆð —	‘	˜,×8Ñ8Ó9ˆàˆØÑ$Ø×'×'Ø#×7Ñ7×DÑDÀUÓKäÑRWÓXÑRWÈ$Ÿy™y¨¨T¯[©[×-DÑ-DÖEÑRWÑXÓYÜ˜<Ó7ä'¨Ó-‘ä—z’z¨FÑ3Ù˜EÓ1æàh ¨a°Ð!3Ñ3ˆGØ+/Ñ+;tg Ñ'ˆGØˆNð BIˆGØˆNÜ*ØØ$Ø&×4Ñ4Ø#×.Ñ.ñ	
ð 	
ùò Ys   Â.D+c                 óf  • U R                   R                  nU " USUSS9nU R                  R                  UR                  5      n[        U5       Vs/ sH  oeR                  5       PM     nn[        R                  " USS9R                  SX0R                   R                  5      n[        US9$ s  snf )a:  
Generate sequences of sample predictions from a model with a probability distribution head.

Parameters:
    past_values (`torch.FloatTensor` of shape `(batch_size, sequence_length, num_input_channels)`):
        Past values of the time series that serves as context in order to predict the future.
    past_observed_mask (`torch.BoolTensor` of shape `(batch_size, sequence_length, num_input_channels)`, *optional*):
        Boolean mask to indicate which `past_values` were observed and which were missing. Mask values selected
        in `[0, 1]`:

        - 1 for values that are **observed**,
        - 0 for values that are **missing** (i.e. NaNs that were replaced by zeros).

Return:
    [`SamplePatchTSTOutput`] where the outputs `sequences` tensor will have shape `(batch_size, number of
    samples, num_targets)`.
NF)rÄ   rù  r´  rK  r   r#   r    r  )r=   r  r  r  ra  r  r   r(   r"  r-   ré  ri  r!  s           r2   r#  ÚPatchTSTForRegression.generates  s¦   € ð0  $Ÿ{™{×?Ñ?Ðñ Ø#ØØ1Ø!&ñ	
ˆð ×/Ñ/×<Ñ<¸W×=WÑ=WÓXˆä27Ð8LÔ2MÓNÑ2M¨Q×&Ñ&Ö(Ñ2MˆÐNä—+’+˜g¨1Ñ-×2Ñ2°2Ð7KÏ[É[×MdÑMdÓeˆÜ#¨gÑ6Ð6ùò Os   ÁB.r%  rÃ  rÏ   )re   rf   rg   rh   r   rB   r   r(   rm   r   rl   r   rn   r`  rc   r&  ri  r#  ro   rp   rq   s   @r2   r1  r1  	  s÷   ø† ð˜~÷ ð4 ð 15Ø59Ø/3Ø,0Ø&*ñG
à—\‘\ðG
ð   §¡Ñ-ðG
ð % U§\¡\Ñ2ð	G
ð
 ' t™nðG
ð $ D™>ðG
ð ˜d‘^ðG
ð 
ˆuÐ1Ð1Ñ	2ôG
ó ðG
ðR ‡]‚]ƒ_ð 6:ñ'7à—\‘\ð'7ð % U§\¡\Ñ2ð'7ð 
ô	'7ó ö'7r4   r1  )r­  rû   r  rÏ  r1  ró  )NrT   N)NFr   rÇ  rT  )Lri   r6  Údataclassesr   Útypingr   r   r   r(   r   Úactivationsr	   Úmodeling_flash_attention_utilsr
   Úmodeling_outputsr   Úmodeling_utilsr   r   Úprocessing_utilsr   Útime_series_utilsr   r   r   Úutilsr   r   r   Úconfiguration_patchtstr   Ú
get_loggerre   rö  r  rm   rk   r3   r6   rs   Úlistrl   rj   r   r¸   rº   rÍ   rÛ   rû   r  r  r  rW  r\  r`  rc  rf  ri  ÚdistributionsÚDistributionro  rz  r|  rŽ  rž  r¥  r­  rÅ  rÏ  rá  ró  rÿ  r  r(  r1  Ú__all__r¡   r4   r2   Ú<module>rH     sÝ  ðñ ã Ý !ß ,Ñ ,ã Ý å "Ý BÝ /ß FÝ &ß UÑ Uß 9Ñ 9Ý 2ð 
×	Ò	˜HÓ	%€ð  $ØØ(,ñ%ØI‰Ið%à<‰<ð%ð 
‰ð%ð <‰<ð	%ð
 ˜UŸ\™\Ñ*ð%ð e‰_ð%ð ð%ð ˜Ÿ™Ñ%õ%ô>U/˜Ÿ	™	ô U/ôp&˜Ÿ	™	ô &ð2 04Ø',Øñ7%ØL‰Lð7%àð7%ð ' t™nð7%ð !%ð	7%ð
 õ7%ðz 04Øñ	A%ØL‰LðA%à$ T¨3 YÑ/ðA%ð ' t™nðA%ð õ	A%ôH-r—y‘yô -ô`9"b—i‘iô 9"ôxH˜2Ÿ9™9ô HðV ô"2˜oó "2ó ð"2ôJ!˜Ÿ	™	ô !ôH5 §¡ô 5ôp;xÐ-ô ;xð| Ùðñô
4˜+ó 4óó ð4ð6 Ùðñô
: ;ó :óó ð:ð Ùðñô
: +ó :óó ð:ð Ùðñô
. +ó .óó ð.ð4 Ùðñô
: kó :óó ð:ð Ùðñô2˜;ó 2óó ð2ð#ˆu×"Ñ"×/Ñ/ð #¸¿¹ð #È%Ï,É,ô #ñ* 5§<¡<ð *¸(À5Ç<Á<Ñ:Pð *Ðfk×frÑfrõ *ô2 0˜Ÿ	™	ô  0ôH3;˜Ÿ™ô 3;ôn ˜Ÿ	™	ô  ô6 R—Y‘Yô  ð8 ôm
Ð+ó m
ó ðm
ô`˜rŸy™yô ñ8 ðñô
l
Ð4ó l
óð
l
ô^" §¡ô "ñJ ðñô
T
Ð 7ó T
óð
T
ñn ðñô
]˜RŸY™Yó ]óð
]ñ@ ðñô
y7Ð3ó y7óð
y7ôx4˜RŸY™Yô 4ñn ðñô
M7Ð3ó M7óð
M7ò`r4   