
    <hJ                       S r SSKrSSKJr  SSKJrJrJr  SSKrSSKJ	r	  SSK
Jr  SSKJr  SS	KJr  SS
KJrJr  SSKJr  SSKJrJrJr  SSKJrJrJr  SSKJr  \R>                  " \ 5      r!   SmS\	RD                  S\RF                  S\RF                  S\RF                  S\\RF                     S\\$   S\$S\\RF                     4S jjr% " S S\	RD                  5      r& " S S\	RD                  5      r'   SnS\RF                  S\$S\\(   S \)S!\*4
S" jjr+  SoS\RF                  S#\\(\*4   S\\(   S!\*4S$ jjr, " S% S&\	RD                  5      r- " S' S(\	RD                  5      r. " S) S*\	RD                  5      r/\ " S+ S,\5      5       r0 " S- S.\	RD                  5      r1 " S/ S0\	RD                  5      r2 " S1 S2\05      r3\\" S3S49 " S5 S6\5      5       5       r4\\" S7S49 " S8 S9\5      5       5       r5\\" S:S49 " S; S<\5      5       5       r6\\" S=S49 " S> S?\5      5       5       r7\\" S@S49 " SA SB\5      5       5       r8\\" SCS49 " SD SE\5      5       5       r9SF\Rt                  Rv                  SG\RF                  SH\RF                  4SI jr<SpSJ\RF                  SK\\RF                     SH\RF                  4SL jjr= " SM SN\	RD                  5      r> " SO SP\	RD                  5      r? " SQ SR\	RD                  5      r@ " SS ST\	RD                  5      rA\ " SU SV\05      5       rB " SW SX\	RD                  5      rC\" SYS49 " SZ S[\05      5       rD " S\ S]\	RD                  5      rE\" S^S49 " S_ S`\05      5       rF\" SaS49 " Sb Sc\	RD                  5      5       rG\" SdS49 " Se Sf\05      5       rH " Sg Sh\	RD                  5      rI\" SiS49 " Sj Sk\05      5       rJ/ SlQrKg)qzPyTorch PatchTST model.    N)	dataclass)CallableOptionalUnion)nn   )ACT2CLS)FlashAttentionKwargs)BaseModelOutput)ALL_ATTENTION_FUNCTIONSPreTrainedModel)Unpack)NegativeBinomialOutputNormalOutputStudentTOutput)ModelOutputauto_docstringlogging   )PatchTSTConfigmodulequerykeyvalueattention_maskscalingdropout	head_maskc                    Uc  UR                  S5      S-  n[        R                  " XR                  SS5      5      U-  n	Ub  X-   n	[        R
                  R                  U	SS9n	Ub  XR                  SSSS5      -  n	[        R
                  R                  XU R                  S9n	[        R                  " X5      n
U
R                  SS5      R                  5       n
X4$ )N         r   dimr   )ptraining)sizetorchmatmul	transposer   
functionalsoftmaxviewr   r&   
contiguous)r   r   r   r   r   r   r   r   kwargsattn_weightsattn_outputs              f/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/models/patchtst/modeling_patchtst.pyeager_attention_forwardr3   &   s     **R.D(<<}}Q':;gEL!#4==((2(>L#nnQAq&AA==((6??([L,,|3K''1-88:K$$    c                   Z  ^  \ rS rSrSr     SS\S\S\S\S\S\S	\\	   4U 4S
 jjjr
    SS\R                  S\\R                     S\\R                     S\\R                     S\\   S\\   S\\R                  \\R                     \\\R                        4   4S jjrSrU =r$ )PatchTSTAttentionE   z=Multi-headed attention from 'Attention Is All You Need' paper	embed_dim	num_headsr   
is_decoderbias	is_causalconfigc                   > [         TU ]  5         Xl        X l        X0l        X-  U l        Xpl        U R
                  U-  U R                  :w  a  [        SU R                   SU S35      eU R
                  S-  U l        X@l	        X`l
        [        R                  " XUS9U l        [        R                  " XUS9U l        [        R                  " XUS9U l        [        R                  " XUS9U l        g )Nz;embed_dim must be divisible by num_heads (got `embed_dim`: z and `num_heads`: ).r!   r;   )super__init__r8   r9   r   head_dimr=   
ValueErrorr   r:   r<   r   Lineark_projv_projq_projout_proj)	selfr8   r9   r   r:   r;   r<   r=   	__class__s	           r2   rB   PatchTSTAttention.__init__H   s     	""!.MMI%$..8MdnnM]$YKr3  }}d*$"ii	4@ii	4@ii	4@		)TBr4   hidden_stateskey_value_statesr   layer_head_maskoutput_attentionsr/   returnc                     USLnUR                   SS u  pU(       a  UR                   S   OU	n
XSU R                  4nXSU R                  4nU R                  U5      R                  " U6 R	                  SS5      nU(       a  UOUnU R                  U5      R                  " U6 R	                  SS5      nU R                  U5      R                  " U6 R	                  SS5      n[        nU R                  R                  S:w  a  [        U R                  R                     nU" U UUUU4U R                  (       d  SOU R                  U R                  UUS.UD6u  nnUR                  XS5      R                  5       nU R!                  U5      nUUS4$ )z#Input shape: Batch x Time x ChannelNr    r   r"   eager        )r   r   rP   r   )shaperC   rH   r-   r*   rF   rG   r3   r=   _attn_implementationr   r&   r   r   reshaper.   rI   )rJ   rM   rN   r   rO   rP   r/   is_cross_attentionbsztgt_lensrc_lenq_input_shapekv_input_shapequery_statescurrent_states
key_statesvalue_statesattention_interfacer1   r0   s                       r2   forwardPatchTSTAttention.forwardg   s    .T9 %**3B//A"((+wr4==9DMM: {{=166FPPQRTUV-?)][[055~FPPQRTUV
{{>277HRRSTVWX(?;;++w6"9$++:Z:Z"[$7%
  $}}C$,,LL/%%
 %
!\ "))#;FFHmmK0L$..r4   )r=   r   r8   rC   r<   r:   rF   r9   rI   rH   r   rG   )rT   FTFN)NNNF)__name__
__module____qualname____firstlineno____doc__intfloatboolr   r   rB   r(   Tensorr   r
   tuplerc   __static_attributes____classcell__rK   s   @r2   r6   r6   E   s    G  +/CC C 	C
 C C C (C CD 481526,13/||3/ #5<<03/ !.	3/
 "%,,/3/ $D>3/ -.3/ 
u||Xell3XeELL>Q5RR	S3/ 3/r4   r6   c                   V   ^  \ rS rSrSrS\4U 4S jjrS\R                  4S jr	Sr
U =r$ )PatchTSTBatchNorm   zH
Compute batch normalization over the sequence length (time) dimension.
r=   c                 ~   > [         TU ]  5         [        R                  " UR                  UR
                  S9U l        g )Neps)rA   rB   r   BatchNorm1dd_modelnorm_eps	batchnormrJ   r=   rK   s     r2   rB   PatchTSTBatchNorm.__init__   s(    FOOLr4   inputsc                 l    UR                  SS5      nU R                  U5      nUR                  SS5      $ )z
Parameters:
    inputs (`torch.Tensor` of shape `(batch_size, sequence_length, d_model)`):
        input for Batch norm calculation
Returns:
    `torch.Tensor` of shape `(batch_size, sequence_length, d_model)`
r   r"   )r*   r{   )rJ   r~   outputs      r2   rc   PatchTSTBatchNorm.forward   s7     !!!Q''1%%r4   )r{   re   rf   rg   rh   ri   r   rB   r(   rm   rc   ro   rp   rq   s   @r2   rs   rs      s+    M~ M
&ell 
& 
&r4   rs   r~   
mask_ratiounmasked_channel_indiceschannel_consistent_masking
mask_valuec                    US:  d  US:  a  [        SU S35      eU R                  u  pVpxU R                  n	[        USU-
  -  5      n
U(       a*  [        R
                  " USXyS9nUR                  SUS5      nO[        R
                  " XVXyS9n[        R                  " XVXyS9nSUSS2SS2SU
24'   [        R                  " USS9n[        R                  " USS9n[        R                  " USUS	9nUR                  S5      R                  SSSU5      nUb  SUSS2USS2SS24'   U R                  UR                  5       U5      nXS
   4$ )a  random_masking: Mask the input considering the control variables.

Args:
    inputs (`torch.Tensor` of shape `(batch_size, num_channels, sequence_length, num_features)`):
        The input tensor to mask.
    mask_ratio (`float`):
        Masking ratio applied to mask the input data during random pretraining. It is the number between 0 and 1.
    unmasked_channel_indices (list, *optional*):
        Indices of channels that will not be masked.
    channel_consistent_masking (bool, *optional*, defaults to `False`):
        When true, masking will be same across all channels of a timeseries. Otherwise, masking positions will vary
        across channels.
    mask_value (int, *optional*, defaults to 0):
        Define the value of masked patches for pretraining.

Returns:
    `tuple(torch.Tensor)`: inputs_mask, masked input, same shape as input Tensor and mask tensor of shape [bs x c x
    n]
r   r   zMask ratio z has to be between 0 and 1.deviceNr    r#   )r$   index.r   )rD   rU   r   rj   r(   randrepeatonesargsortgather	unsqueezemasked_fillrl   )r~   r   r   r   r   
batch_sizenum_channelssequence_lengthnum_featuresr   len_keepnoisemaskids_shuffleids_restoreinputs_masks                   r2   random_maskingr      sA   4 A~q;zl2MNOO>Dll;Jo]]F?a*n56H!

:q/IQa0 

:_T ::jODDAyy --2.K--4K<<"K8D>>"$$Q1l;D+23Q(!Q./$$TYY[*=KV$$r4   num_forecast_mask_patchesc                 N   [        U[        5      (       a  U/nU Vs/ sH  nSPM     nnU R                  u  pgp[        R                  " XgXR
                  S9n
/ nSn[        U5      n[        X5       HG  u  pUS::  d  X:  a  [        SU S35      e[        Xo-  U-  5      nUR                  XU/5        UU-  nMI     [        US S9nX:  a  US   S   Xl-
  -   US   S'   OX:  a  US	   S   X-
  -   US	   S'   SnU H  u  nnnUU-   nSU
UU2S
S
2U* S
24'   UnM     [        R                  " U
R                  S   5      nU
U   n
U
R                  S	5      R                  SSSU	5      n
Ub  SU
S
S
2US
S
2S
S
24'   U R                  U
R                  5       U5      nUU
S   4$ s  snf )ai  Forecast masking that masks the last K patches where K is from the num_forecast_mask_patches.
If num_forecast_mask_patches is a list, samples in the batch will be randomly masked by numbers defined in the list.

Parameters:
    inputs (`torch.Tensor`):
        Input of shape `(bs, num_channels, num_patch, patch_length)`
    num_forecast_mask_patches (`list`):
        Number of patches to be masked at the end of each batch sample. e.g. 4 or [3, 5].
    unmasked_channel_indices (`list`, *optional*):
        Indices of channels that are not masked.
    mask_value (`int`, *optional*, defaults to 0):
        Values in the masked patches will be filled by `mask_value`.

Returns:
    `tuple(torch.Tensor)`: inputs_mask, masked input, same shape as inputs Tensor and Mask tensor of shape `(bs,
    num_channels , num_patch)` or `(bs, tsg1, tsg2, num_channels, num_patch)`
r   r   r   znum_forecast_mask_patches z6 should be greater than 0 and less than total patches.c                     U S   $ )Nr"    )xs    r2   <lambda>"forecast_masking.<locals>.<lambda>  s    !A$r4   )r   r"   r    Nr   )
isinstancerj   rU   r(   zerosr   sumziprD   appendsortedrandpermr   r   r   rl   )r~   r   r   r   _forecast_mask_ratiosr   r   r   r   r   t_listtotal_lengthtotal_ratiopatch_lengthratiotemp_lenbatch1	patch_lenbatch2permr   s                         r2   forecast_maskingr      s   0 +S11%>$?!'@A'@!A'@A>Dll;Jo;;zWDFL*+K"#<S1 ?,\N:pq  z)K78|H56   T F/F ay|z'@Aq	!		"r
1)BCr
1F"(	1h("./VF]A	z{*+ #)
 >>$**Q-(D:D>>"$$Q1l;D+23Q(!Q./$$TYY[*=KV$$O Bs   F"c                   V   ^  \ rS rSrSrS\4U 4S jjrS\R                  4S jr	Sr
U =r$ )PatchTSTPatchifyi1  z
A class to patchify the time series sequence into different patches

Returns:
    `torch.Tensor` of shape `(batch_size, num_channels, num_patches, patch_length)`
r=   c                   > [         TU ]  5         UR                  U l        UR                  U l        UR
                  U l        U R                  U R                  ::  a&  [        SU R                   SU R                   S35      e[        U R                  U R                  5      U R                  -
  U R
                  -  S-   U l        U R                  U R
                  U R                  S-
  -  -   nU R                  U-
  U l	        g )NzSequence length (z+) has to be greater than the patch length ()r   )
rA   rB   context_lengthr   r   patch_striderD   maxnum_patchessequence_start)rJ   r=   new_sequence_lengthrK   s      r2   rB   PatchTSTPatchify.__init__9  s    %44"//"//4#4#44#D$8$8#99deievevdwwxy 
   4 4d6G6GH4K\K\\aeararruvv"//$2C2CtGWGWZ[G[2\\"225HHr4   past_valuesc                 4   UR                   S   nX R                  :w  a  [        SU SU R                   S35      eUSS2U R                  S2SS24   nUR	                  SU R
                  U R                  S9nUR                  SS5      R                  5       nU$ )z
Parameters:
    past_values (`torch.Tensor` of shape `(batch_size, sequence_length, num_channels)`, *required*):
        Input for patchification

Returns:
    `torch.Tensor` of shape `(batch_size, num_channels, num_patches, patch_length)`
zInput sequence length (z%) doesn't match model configuration (r?   N)	dimensionr'   step)	rU   r   rD   r   unfoldr   r   r*   r.   )rJ   r   r   r   s       r2   rc   PatchTSTPatchify.forwardJ  s     &++B/222)/)::_`d`t`t_uuwx  Q 3 3 5q89$2C2C$J[J[\!!"b)446r4   )r   r   r   r   r   r   rq   s   @r2   r   r   1  s+    I~ I"5<<  r4   r   c                   V   ^  \ rS rSrSrS\4U 4S jjrS\R                  4S jr	Sr
U =r$ )PatchTSTMaskingia  al  
Class to perform random or forecast masking.

Parameters:
    config (`PatchTSTConfig`): model config
Returns:
    x_mask (`torch.Tensor` of shape `(batch_size, num_channels, num_patches, patch_length)`)
        Masked patched input
    mask (`torch.Tensor` of shape `(batch_size, num_channels, num_patches)`)
        Bool tensor indicating True on masked points
r=   c                 >  > [         TU ]  5         UR                  U l        UR                  U l        UR                  U l        UR
                  U l        UR                  U l        UR                  U l        U R                  b  [        U R                  5      U l        g g N)	rA   rB   random_mask_ratior   	mask_typer   r   r   r   r|   s     r2   rB   PatchTSTMasking.__init__n  s    !'!9!9*0*K*K')))/)I)I&(.(G(G% ++((4,243P3P,QD) 5r4   patch_inputc                 d   U R                   S:X  a8  [        UU R                  U R                  U R                  U R
                  S9u  p#OVU R                   S:X  a-  [        UU R                  U R                  U R
                  S9u  p#O[        SU R                    S35      eUR                  5       nX#4$ )a  
Parameters:
    patch_input (`torch.Tensor` of shape `(batch_size, num_channels, num_patches, patch_length)`, *required*):
        Patch input

Return:
    masked_input (`torch.Tensor` of shape `(batch_size, num_channels, num_patches, patch_length)`)
        Masked patched input
    mask (`torch.Tensor` of shape `(batch_size, num_channels, num_patches)`)
        Bool tensor indicating True on masked points

random)r~   r   r   r   r   forecast)r~   r   r   r   zInvalid mask type .)
r   r   r   r   r   r   r   r   rD   rl   )rJ   r   masked_inputr   s       r2   rc   PatchTSTMasking.forwardy  s     >>X%!/"11)-)F)F+/+J+J??"L$ ^^z)!1"*.*H*H)-)F)F??	"L$ 1$..1ACDD yy{!!r4   )r   r   r   r   r   r   r   rq   s   @r2   r   r   a  s+    
	R~ 	R!"5<< !" !"r4   r   c                   d   ^  \ rS rSrSrS\4U 4S jjrS	S\R                  S\	\
   4S jjrSrU =r$ )
PatchTSTEncoderLayeri  z
PatchTST encoder layer
r=   c                 (  > [         TU ]  5         UR                  U l        [        UR                  UR
                  UR                  US9U l        UR                  S:  a   [        R                  " UR                  5      O[        R                  " 5       U l        UR                  S:X  a  [        U5      U l        OWUR                  S:X  a/  [        R                   " UR                  UR"                  S9U l        O[%        UR                   S35      eU R                  (       a  UR                  S:  a   [        R                  " UR                  5      O[        R                  " 5       U l        UR                  S:X  a  [        U5      U l        OWUR                  S:X  a/  [        R                   " UR                  UR"                  S9U l        O[%        UR                   S35      e[        R*                  " [        R,                  " UR                  UR.                  UR0                  S9[2        UR4                     " 5       UR6                  S:  a   [        R                  " UR6                  5      O[        R                  " 5       [        R,                  " UR.                  UR                  UR0                  S95      U l        UR                  S:  a   [        R                  " UR                  5      O[        R                  " 5       U l        UR                  S:X  a  [        U5      U l        OWUR                  S:X  a/  [        R                   " UR                  UR"                  S9U l        O[%        UR                   S35      eUR>                  U l        g )N)r8   r9   r   r=   r   r{   	layernormrv   z$ is not a supported norm layer type.r@   ) rA   rB   channel_attentionr6   ry   num_attention_headsattention_dropout	self_attnpath_dropoutr   DropoutIdentitydropout_path1	norm_typers   norm_sublayer1	LayerNormrz   rD   dropout_path2norm_sublayer2
SequentialrE   ffn_dimr;   r	   activation_function
ff_dropoutffdropout_path3norm_sublayer3pre_normr|   s     r2   rB   PatchTSTEncoderLayer.__init__  s   !'!9!9*nn00,,	
 AG@S@SVW@WRZZ(;(;<]_]h]h]j{*"3F";D,"$,,v~~6??"SD 0 011UVWW !!DJDWDWZ[D[F,?,?!@acalalanD;.&7&?#!![0&(ll6>>v&W# F$4$4#55Y!Z[[ --IIfnnfnn6;;GF../1-3->->-BBJJv(()IIfnnfnn6;;G	
 AG@S@SVW@WRZZ(;(;<]_]h]h]j{*"3F";D,"$,,v~~6??"SD 0 011UVWWr4   hidden_staterP   c                    UR                   u  p4pVUR                  X4-  XV5      nU R                  (       a6  U R                  U R	                  U5      US9u  pxn	XR                  U5      -   nO4U R                  XS9u  pxn	U R	                  XR                  U5      -   5      nUR                  X4XV5      nU R                  (       a  UR                  SS5      R                  5       nUR                  X5-  XF5      nU R                  (       a6  U R                  U R                  U5      US9u  pzn	XR                  U5      -   nO4U R                  XS9u  pzn	U R                  XR                  U5      -   5      nUR                  X5XF5      nUR                  SS5      R                  5       nUR                  X4-  XV5      nU R                  (       a2  XR                  U R                  U R                  U5      5      5      -   nO1U R                  XR                  U R                  U5      5      -   5      nUR                  X4XV5      nU4nU(       a  XR                  (       a  UW
4OU4-  nU$ )ao  
Parameters:
    hidden_state (`torch.Tensor` of shape `(batch_size, num_channels, sequence_length, d_model)`, *required*):
        Past values of the time series
    output_attentions (`bool`, *optional*):
        Whether or not to return the output attention of all layers
Return:
    `torch.Tensor` of shape `(batch_size, num_channels, sequence_length, d_model)`

)rM   rP   r"   r   )rU   r-   r   r   r   r   rW   r   r*   r.   r   r   r   r   r   )rJ   r   rP   r   num_input_channelsr   ry   r1   r0   r   channel_attn_weightsoutputss               r2   rc   PatchTSTEncoderLayer.forward  si    DPCUCU@
 $(()H/c==+/>>"11,?Sd ,: ,(Kq (*<*<[*IIL ,0>>* ,: ,(Kq  ..|>P>PQ\>]/]^L $++JOe !!'11!Q7BBDL',,Z-IK]gL}}7;~~"&"5"5l"CWh 8F 841  ,.@.@.MM 8<~~". 8F 841  $22<BTBTU`Ba3ab (//
M_iL'11!Q7BBDL $(()H/c== (*<*<TWWTEXEXYeEf=g*hhL  ..|>P>PQUQXQXYeQf>g/ghL $++JOe/?U?U&:;\h[jjGr4   )
r   r   r   r   r   r   r   r   r   r   r   )re   rf   rg   rh   ri   r   rB   r(   rm   r   rl   rc   ro   rp   rq   s   @r2   r   r     s9    0(~ 0(dQELL QXd^ Q Qr4   r   c                   X    \ rS rSr% \\S'   SrSrSrS\	R                  4S jrSS jrS	rg
)PatchTSTPreTrainedModeli(  r=   modelr   Fr   c                    [        U[        5      (       a  [        U R                  R                  U R                  R
                  5      U R                  R
                  -
  U R                  R                  -  S-   nU R                  R                  (       a-  [        R                  R                  UR                  SS9  US-  nUR                  U R                  U5      Ul        g[        U[        R                  5      (       aJ  UR                  R                   R#                  5         UR$                  R                   R'                  S5        g[        U[(        5      (       a^  UR*                  R                  R                   R#                  5         UR*                  R$                  R                   R'                  S5        g[        U[        R,                  5      (       ak  UR$                  R                   R                  SU R                  R.                  S9  UR                  b%  UR                  R                   R#                  5         ggg)z
Initialize weights
r   g{Gz?)std      ?rT   )meanr   N)r   PatchTSTPositionalEncodingr   r=   r   r   r   use_cls_tokenr   initnormal_	cls_token_init_peposition_encr   r;   datazero_weightfill_rs   r{   rE   init_std)rJ   r   r   s      r2   _init_weights%PatchTSTPreTrainedModel._init_weights/  s    f899 DKK..0H0HIDKKLdLdd))*,-.K {{(( 0 0d;q "(//$++{"KF--KK""$MM$$S) 122!!&&,,.##((..s3		**MM&&CT[[5I5I&J{{&  &&( ' +r4   c                 <    [        U[        5      (       a  X!l        g g r   )r   PatchTSTEncodergradient_checkpointing)rJ   r   r   s      r2   _set_gradient_checkpointing3PatchTSTPreTrainedModel._set_gradient_checkpointingI  s    f00,1) 1r4   r   N)F)re   rf   rg   rh   r   __annotations__base_model_prefixmain_input_namesupports_gradient_checkpointingr   Moduler  r  ro   r   r4   r2   r   r   (  s.    #O&+#)BII )42r4   r   c                   R   ^  \ rS rSrS\4U 4S jjrS\R                  4S jrSr	U =r
$ )PatchTSTEmbeddingiN  r=   c                   > [         TU ]  5         UR                  U l        UR                  U l        U R                  (       a1  [        R
                  " UR                  UR                  5      U l        g [        R                  " 5       U l        [        UR                  5       HG  nU R                  R                  [        R
                  " UR                  UR                  5      5        MI     g r   )rA   rB   r   share_embeddingr   rE   r   ry   input_embedding
ModuleListranger   )rJ   r=   r   rK   s      r2   rB   PatchTSTEmbedding.__init__O  s    "(";";%55#%99V-@-@&..#QD #%==?D 6445$$++BIIf6I6I6>>,Z[ 6r4   r   c                 h   UR                   S   nX R                  :w  a  [        SU R                   SU S35      eU R                  (       a  U R	                  U5      nU$ [        U5       Vs/ sH$  o@R                  U   " USS2USS2SS24   5      PM&     nn[        R                  " USS9nU$ s  snf )z
Parameters:
    patch_input (`torch.Tensor` of shape `(batch_size, num_channels, num_patches, patch_length)`, *required*):
        Patch input for embedding
return:
    `torch.Tensor` of shape `(batch_size, num_channels, num_patches, d_model)`
r   z&The defined number of input channels (zQ) in the config has to be the same as the number of channels in the batch input (r   Nr#   )rU   r   rD   r  r  r  r(   stack)rJ   r   r   
embeddingsis        r2   rc   PatchTSTEmbedding.forward[  s     )..q1!8!8889P9P8Q RTTfSgghj  --k:J  UZZlTmnTmq..q1+aAqj2IJTmJnZQ7J os   ,*B/)r  r   r  re   rf   rg   rh   r   rB   r(   rm   rc   ro   rp   rq   s   @r2   r  r  N  s&    
\~ 
\5<<  r4   r  c                      ^  \ rS rSrSrS\S\4U 4S jjr\S\S\S\	R                  4S j5       rS\R                  4S	 jrS
rU =r$ )r  ir  z
Class for positional encoding
r=   r   c                   > [         TU ]  5         UR                  U l        UR                  U l        UR                  (       aA  [        R
                  " [        R                  " SSSUR                  5      5      U l	        US-  nU R                  X5      U l        UR                  S:  a&  [        R                  " UR                  5      U l        g [        R                  " 5       U l        g )Nr   r   )rA   rB   r  r   r   	Parameterr(   r   ry   r  r  r  positional_dropoutr   r   rJ   r=   r   rK   s      r2   rB   #PatchTSTPositionalEncoding.__init__w  s    #11"(";";\\%++aAv~~*NODN1K MM&> 6<5N5NQR5RBJJv001 	XZXcXcXe 	r4   rQ   c                 $   U R                   S:X  a5  [        R                  " [        R                  " XR
                  5      SS9nU$ U R                   S:X  Ga#  [        R                  " XR
                  5      n[        R                  " SU5      R                  S5      n[        R                  " [        R                  " SU R
                  S5      [        R                  " S5      U R
                  -  * -  5      n[        R                  " X4-  5      US S 2SS S24'   [        R                  " X4-  5      US S 2SS S24'   X"R                  5       -
  nX"R                  5       S	-  -  n[        R                  " US
S9nU$ [!        U R                    S35      e)Nr   Trequires_gradsincosr   r   r"   g     @
   FzN is not a valid positional encoder. Available types are 'random' and 'sincos'.)positional_encoding_typer   r)  r(   randnry   r   aranger   expmathlogsincosr   r   rD   )r=   r   r  positiondiv_terms        r2   r  #PatchTSTPositionalEncoding._init_pe  sX    **h6<<K(P`deL  ,,8 ;;{NNCL||A{3==a@Hyya!CQXHY\b\j\jHjFk!klH$)IIh.A$BLADqD!$)IIh.A$BLADqD!'*;*;*==L'+;+;+=+BCL<<EJL
  223  4B  C r4   r   c                 x   U R                   (       a  U R                  XR                  SS 2S S 24   -   5      nU R                  U R                  S S2S S 24   -   nUR	                  UR
                  S   U R                  SS5      n[        R                  " X14SS9nU$ U R                  XR                  -   5      nU$ )Nr   r   r    r"   r#   )	r  r*  r  r  expandrU   r   r(   cat)rJ   r   r  
cls_tokensr   s        r2   rc   "PatchTSTPositionalEncoding.forward  s    11+@Q@QRSRTVWRW@X2XYK):):2A2q5)AAI"))+*;*;A*>@W@WY[]_`J 99j%>AFL   22;ARAR3RSLr4   )r  r   r  r*  r  )re   rf   rg   rh   ri   r   rj   rB   staticmethodr   r)  r  r(   rm   rc   ro   rp   rq   s   @r2   r  r  r  s]    
~ 
C 
  c bll  &5<<  r4   r  c            	       z   ^  \ rS rSrSrS\S\4U 4S jjr  SS\R                  S\
\   S\
\   S	\4S
 jjrSrU =r$ )r  i  z
PatchTST Encoder
r=   r   c                 *  > [         TU ]  U5        SU l        [        U5      U l        [        X5      U l        [        R                  " [        UR                  5       Vs/ sH  n[        U5      PM     sn5      U l        U R                  5         g s  snf )NF)rA   rB   r  r  embedderr  positional_encoderr   r  r  num_hidden_layersr   layers	post_init)rJ   r=   r   r$  rK   s       r2   rB   PatchTSTEncoder.__init__  sx     &+# *&1"<V"Qmm5QWQiQiKj$kKja%9&%AKj$kl 	 %ls   Br   output_hidden_statesrP   rQ   c                 h   Ub  UOU R                   R                  nUb  UOU R                   R                  nU R                  U5      nU R	                  U5      nU(       a  SOSnU(       a  SOSnU R
                   H+  nU(       a  XT4-   nU" XCS9nUS   nU(       d  M#  XhS   4-   nM-     [        XEUS9$ )ar  
Parameters:
    patch_input (`torch.Tensor` of shape `(batch_size, num_channels, num_patches, patch_length)`, *required*):
        Past values of the time series
    output_hidden_states (bool, optional): Indicates if hidden states should be outputted.
    output_attentions (bool, optional): Indicates if attentions should be outputted.

return:
    `BaseModelOutput`
Nr   )r   rP   r   r   )last_hidden_staterM   
attentions)r=   rP   rK  rE  rF  rH  r   )	rJ   r   rK  rP   r   encoder_statesall_attentionsencoder_layerlayer_outputss	            r2   rc   PatchTSTEncoder.forward  s      2C1N-TXT_T_TqTq$8$D $++JjJj 	
 mmK0..{;30d![[M#!//!A)|iM )+L  !/3C2E!E ) hvwwr4   )rE  r  rH  rF  NN)re   rf   rg   rh   ri   r   rj   rB   r(   rm   r   rl   r   rc   ro   rp   rq   s   @r2   r  r    se    ~ C " 04,0	(x\\(x 'tn(x $D>	(x
 
(x (xr4   r  zG
    Base class for model's outputs, with potential hidden states.
    )custom_introc                   >   \ rS rSr% SrSr\\R                     \	S'   Sr
\\\R                        \	S'   Sr\\\R                        \	S'   Sr\\R                     \	S'   Sr\\R                     \	S'   Sr\\R                     \	S	'   Sr\\R                     \	S
'   Srg)PatchTSTModelOutputi  a  
last_hidden_state (`torch.FloatTensor` of shape `(batch_size, num_channels, num_patches, patch_length)`):
    Sequence of hidden-states at the output of the last layer of the model.
hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
    Tuple of `torch.FloatTensor` (one for the output of the embeddings, if the model has an embedding layer, +
    one for the output of each layer) of shape `(batch_size, num_channels, height, width)`. Hidden-states of
    the model at the output of each layer plus the optional initial embedding outputs.
mask (`torch.FloatTensor` of shape `(batch_size, num_channels, num_patches)`, *optional*):
    Bool masked tensor indicating which patches are masked
loc (`torch.FloatTensor` of shape `(batch_size, 1, num_channels)`, *optional*):
    Mean of the input data (batch_size, sequence_length, num_channels) over the sequence_length
scale (`torch.FloatTensor` of shape `(batch_size, 1, num_channels)`, *optional*):
    Std of the input data (batch_size, sequence_length, num_channels) over the sequence_length
patch_input (`torch.FloatTensor` of shape `(batch_size, num_channels, num_patches, patch_length)`):
    Patched input to the Transformer
NrM  rM   rN  r   locscaler   r   )re   rf   rg   rh   ri   rM  r   r(   FloatTensorr  rM   rn   rN  r   rX  rY  r   ro   r   r4   r2   rW  rW    s    " 6:x 1 1298<M8E%"3"345<59Ju00129(,D(5$$
%,'+C%##	$+)-E8E%%&-/3K%++,3r4   rW  z4
    Output type of [`PatchTSTForPretraining`].
    c                       \ rS rSr% SrSr\\R                     \	S'   Sr
\\R                     \	S'   Sr\\\R                        \	S'   Sr\\\R                        \	S'   Srg)	PatchTSTForPretrainingOutputi	  a
  
loss (*optional*, returned when `labels` is provided, `torch.FloatTensor` of shape `(1,)`):
    MSE loss.
prediction_output (`torch.FloatTensor` of shape `(batch_size, sequence_length, config.vocab_size)`):
    Prediction outputs of the time series modeling heads.
Nlossprediction_outputrM   rN  r   )re   rf   rg   rh   ri   r]  r   r(   rZ  r  r^  rM   rn   rN  ro   r   r4   r2   r\  r\  	  sh     )-D(5$$
%,59x 1 1298<M8E%"3"345<59Ju00129r4   r\  z3
    Output type of [`PatchTSTForRegression`].
    c                       \ rS rSr% SrSr\\R                     \	S'   Sr
\\R                     \	S'   Sr\\\R                        \	S'   Sr\\\R                        \	S'   Srg)	PatchTSTForRegressionOutputi  z
loss (*optional*, returned when `labels` is provided, `torch.FloatTensor` of shape `(1,)`):
    MSE loss.
regression_outputs (`torch.FloatTensor` of shape `(batch_size, num_targets)`):
    Regression outputs of the time series modeling heads.
Nr]  regression_outputsrM   rN  r   )re   rf   rg   rh   ri   r]  r   r(   rZ  r  ra  rM   rn   rN  ro   r   r4   r2   r`  r`    sh     )-D(5$$
%,6:!2!23:8<M8E%"3"345<59Ju00129r4   r`  z3
    Output type of [`PatchTSTForPrediction`].
    c                      \ rS rSr% SrSr\\R                     \	S'   Sr
\\R                     \	S'   Sr\\\R                        \	S'   Sr\\\R                        \	S'   Sr\\R                     \	S'   Sr\\R                     \	S	'   S
rg)PatchTSTForPredictionOutputi1  a  
loss (*optional*, returned when `labels` is provided, `torch.FloatTensor` of shape `(1,)`):
    MSE loss.
prediction_outputs (`torch.FloatTensor` of shape `(batch_size, prediction_length, -1)`):
    Prediction outputs of the time series modeling heads.
attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
    Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
    sequence_length)`.

    Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
    heads.
loc: (`torch.FloatTensor` of shape `(batch_size, 1, num_channels)`, *optional*)
    Mean of the input data (batch_size, sequence_length, num_channels) over the sequence_length
scale: (`torch.FloatTensor` of shape `(batch_size, 1, num_channels)`, *optional*)
    Std of the input data (batch_size, sequence_length, num_channels) over the sequence_length
Nr]  prediction_outputsrM   rN  rX  rY  r   )re   rf   rg   rh   ri   r]  r   r(   rZ  r  rd  rM   rn   rN  rX  rY  ro   r   r4   r2   rc  rc  1  s    " )-D(5$$
%,6:!2!23:8<M8E%"3"345<59Ju00129'+C%##	$+)-E8E%%&-r4   rc  z7
    Output type of [`PatchTSTForClassification`].
    c                       \ rS rSr% SrSr\\R                     \	S'   Sr
\\R                     \	S'   Sr\\\R                        \	S'   Sr\\\R                        \	S'   Srg)	PatchTSTForClassificationOutputiQ  as  
loss (*optional*, returned when `labels` is provided, `torch.FloatTensor` of shape `(1,)`):
    Total loss as the sum of the masked language modeling loss and the next sequence prediction
    (classification) loss.
prediction_logits (`torch.FloatTensor` of shape `(batch_size, num_targets)`):
    Prediction scores of the PatchTST modeling head (scores before SoftMax).
Nr]  prediction_logitsrM   rN  r   )re   rf   rg   rh   ri   r]  r   r(   rZ  r  rg  rM   rn   rN  ro   r   r4   r2   rf  rf  Q  sh     )-D(5$$
%,59x 1 1298<M8E%"3"345<59Ju00129r4   rf  z
    Base class for time series model's predictions outputs that contains the sampled values from the chosen
    distribution.
    c                   B    \ rS rSr% SrSr\\R                     \	S'   Sr
g)SamplePatchTSTOutputif  z
sequences (`torch.FloatTensor` of shape `(batch_size, num_samples, prediction_length, num_targets)`):
    Sampled values from the chosen distribution.
N	sequencesr   )re   rf   rg   rh   ri   rj  r   r(   rZ  r  ro   r   r4   r2   ri  ri  f  s    
 .2Ix))*1r4   ri  inputtargetrQ   c                 &    U R                  U5      * $ )z[
Computes the negative log likelihood loss from input distribution with respect to target.
)log_prob)rk  rl  s     r2   nllro  w  s     NN6"""r4   input_tensorweightsc                 R   Ub  [         R                  " US:g  X-  [         R                  " U 5      5      n[         R                  " U(       a  UR	                  US9OUR	                  5       SS9nU(       a  UR	                  US9U-  $ UR	                  5       U-  $ U R                  US9$ )a:  
Computes the weighted average of a given tensor across a given `dim`, masking values associated with weight zero,
meaning instead of `nan * 0 = nan` you will get `0 * 0 = 0`.

Args:
    input_tensor (`torch.FloatTensor`):
        Input tensor, of which the average must be computed.
    weights (`torch.FloatTensor`, *optional*):
        Weights tensor, of the same shape as `input_tensor`.
    dim (`int`, *optional*):
        The dim along which to average `input_tensor`.

Returns:
    `torch.FloatTensor`: The tensor with values averaged along the specified `dim`.
r   r#   r   min)r(   where
zeros_likeclampr   r   )rp  rq  r$   weighted_tensorsum_weightss        r2   weighted_averagerz    s      ++glL4JEL\L\]iLjkkk#'++#+"67;;=VYZ03###,R]]]9L9L9NR]]]  S ))r4   c            	          ^  \ rS rSrSrS\4U 4S jjrS\R                  S\R                  S\	\R                  \R                  \R                  4   4S jr
S	rU =r$ )
PatchTSTStdScaleri  z
Standardize features by calculating the mean and scaling along the first dimension, and then normalizes it by
subtracting from the mean and dividing by the standard deviation.
r=   c                   > [         TU ]  5         [        US5      (       a  UR                  OSU l        [        US5      (       a  UR
                  OSU l        [        US5      (       a  UR                  U l        g SU l        g )Nscaling_dimr   keepdimTminimum_scalegh㈵>)rA   rB   hasattrr~  r$   r  r  r|   s     r2   rB   PatchTSTStdScaler.__init__  sd    )0)G)G6%%Q)0)C)Cv~~5<V_5U5UV11[_r4   r  observed_indicatorrQ   c                 r   UR                  U R                  U R                  S9nUR                  S5      nX-  R                  U R                  U R                  S9U-  nX-
  U-  S-  R                  U R                  U R                  S9U-  n[        R
                  " XPR                  -   5      nX-
  U-  XF4$ )  
Parameters:
    data (`torch.Tensor` of shape `(batch_size, sequence_length, num_input_channels)`):
        input for Batch norm calculation
    observed_indicator (`torch.BoolTensor` of shape `(batch_size, sequence_length, num_input_channels)`):
        Calculating the scale on the observed indicator.
Returns:
    tuple of `torch.Tensor` of shapes
        (`(batch_size, sequence_length, num_input_channels)`,`(batch_size, 1, num_input_channels)`,
        `(batch_size, 1, num_input_channels)`)
r  r   r"   )r   r$   r  	clamp_minr(   sqrtr  )rJ   r  r  denominatorrX  variancerY  s          r2   rc   PatchTSTStdScaler.forward  s     ),,TXXt||,L!++C0(--dhh-MP[[j$661<AA$((TXT`T`Aadoo

8&8&889
e#S//r4   )r$   r  r  re   rf   rg   rh   ri   r   rB   r(   rm   rn   rc   ro   rp   rq   s   @r2   r|  r|    sX    
`~ `0LL06;ll0	u||U\\5<<7	80 0r4   r|  c            	          ^  \ rS rSrSrS\4U 4S jjrS\R                  S\R                  S\	\R                  \R                  \R                  4   4S jr
S	rU =r$ )
PatchTSTMeanScaleri  z~
Computes a scaling factor as the weighted average absolute value along the first dimension, and scales the data
accordingly.
r=   c                 N  > [         TU ]  5         [        US5      (       a  UR                  OSU l        [        US5      (       a  UR
                  OSU l        [        US5      (       a  UR                  OSU l        [        US5      (       a  UR                  U l        g S U l        g )Nr~  r   r  Tr  绽|=default_scale)rA   rB   r  r~  r$   r  r  r  r|   s     r2   rB   PatchTSTMeanScaler.__init__  s    )0)G)G6%%Q)0)C)Cv~~5<V_5U5UV11[`5<V_5U5UV11[_r4   r  r  rQ   c                    X-  R                  5       R                  U R                  SS9nUR                  U R                  SS9nU[        R                  " USS9-  nU R
                  cL  UR                  SS9n[        R                  " UR                  S5      SS9n[        R                  " Xg-  5      nO#U R
                  [        R                  " U5      -  n[        R                  " US:  XX5      n[        R                  " XPR                  S9nX-  n	U R                  (       d  UR                  U R                  S9nU	[        R                  " U5      U4$ )r  Tr  r   rs  r   r#   )absr   r$   r(   rw  r  squeeze	ones_likeru  r  r  rv  )
rJ   r  r  ts_sumnum_observedrY  	batch_sumbatch_observationsr  scaled_datas
             r2   rc   PatchTSTMeanScaler.forward  s"    +00266txx6N)--dhh-E\q99 %

q
)I!&\-=-=a-@a!H!MM)*HIM ..1GGM L1,eC E'9'9:l||MMdhhM/EE,,U3U::r4   )r  r$   r  r  r  rq   s   @r2   r  r    sX    
`~ `&;LL&;6;ll&;	u||U\\5<<7	8&; &;r4   r  c            
          ^  \ rS rSrSrS\4U 4S jjr S
S\R                  S\	\R                     S\
\R                  \R                  \R                  4   4S jjrS	rU =r$ )PatchTSTNOPScaleri  zt
Assigns a scaling factor equal to 1 along the first dimension, and therefore applies no scaling to the input data.
r=   c                    > [         TU ]  5         [        US5      (       a  UR                  OSU l        [        US5      (       a  UR
                  U l        g SU l        g )Nr~  r   r  T)rA   rB   r  r~  r$   r  r|   s     r2   rB   PatchTSTNOPScaler.__init__  sF    )0)G)G6%%Q)0)C)Cv~~r4   r  r  rQ   c                     [         R                  " USS9R                  U R                  U R                  S9n[         R
                  " USS9R                  U R                  U R                  S9nXU4$ )aP  
Parameters:
    data (`torch.Tensor` of shape `(batch_size, sequence_length, num_input_channels)`):
        input for Batch norm calculation
Returns:
    tuple of `torch.Tensor` of shapes
        (`(batch_size, sequence_length, num_input_channels)`,`(batch_size, 1, num_input_channels)`,
        `(batch_size, 1, num_input_channels)`)
Fr.  r$   r  )r(   r  r   r$   r  rv  )rJ   r  r  rY  rX  s        r2   rc   PatchTSTNOPScaler.forward  sg     E:??DHHVZVbVb?ct59>>488UYUaUa>b%r4   r  r   )re   rf   rg   rh   ri   r   rB   r(   rm   r   rn   rc   ro   rp   rq   s   @r2   r  r    sd    N~ N PT LL 6>u||6L 	u||U\\5<<7	8   r4   r  c            	          ^  \ rS rSrS\4U 4S jjrS\R                  S\R                  S\\R                  \R                  \R                  4   4S jr	Sr
U =r$ )	PatchTSTScaleri  r=   c                    > [         TU ]  5         UR                  S:X  d  UR                  SL a  [        U5      U l        g UR                  S:X  a  [        U5      U l        g [        U5      U l        g )Nr   Tr   )rA   rB   r   r  scalerr|  r  r|   s     r2   rB   PatchTSTScaler.__init__  sU    >>V#v~~'=,V4DK^^u$+F3DK+F3DKr4   r  r  rQ   c                 2    U R                  X5      u  pnXU4$ )a  
Parameters:
    data (`torch.Tensor` of shape `(batch_size, sequence_length, num_input_channels)`):
        Input for scaler calculation
    observed_indicator (`torch.BoolTensor` of shape `(batch_size, sequence_length, num_input_channels)`):
        Calculating the scale on the observed indicator.
Returns:
    tuple of `torch.Tensor` of shapes
        (`(batch_size, sequence_length, num_input_channels)`,`(batch_size, 1, num_input_channels)`,
        `(batch_size, 1, um_input_channels)`)
r  )rJ   r  r  rX  rY  s        r2   rc   PatchTSTScaler.forward  s"      ;;t@5%r4   r  )re   rf   rg   rh   r   rB   r(   rm   rn   rc   ro   rp   rq   s   @r2   r  r    sQ    4~ 4 LL 6;ll 	u||U\\5<<7	8   r4   r  c                      ^  \ rS rSrS\4U 4S jjr     SS\R                  S\\R                     S\\R                     S\\	   S\\	   S	\\	   S
\
\\4   4S jjrSrU =r$ )PatchTSTModeli*  r=   c                 f  > [         TU ]  U5        [        U5      U l        [	        U5      U l        UR                  U l        U R
                  R                  nU R                  (       a  [        U5      U l	        O[        R                  " 5       U l	        [        XS9U l        U R                  5         g )N)r   )rA   rB   r  r  r   
patchifierdo_mask_inputr   r   maskingr   r   r  encoderrI  r+  s      r2   rB   PatchTSTModel.__init__,  s     $V,*62#11oo11*62DL;;=DL&vG 	r4   r   past_observed_maskfuture_valuesrK  rP   return_dictrQ   c           
         Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R                  nUc  [        R
                  " U5      nU R                  X5      u  pxn	U R                  U5      n
U R                  (       a  U R                  U
5      u  pOU R                  U
5      SpU R                  XUS9nU(       d<  UR                  UR                  UR                  4nXXU
4-   n[        S U 5       5      $ [        UR                  UR                  UR                  UUU	U
S9$ )a  
Parameters:
    past_values (`torch.Tensor` of shape `(bs, sequence_length, num_input_channels)`, *required*):
        Input sequence to the model
    past_observed_mask (`torch.BoolTensor` of shape `(batch_size, sequence_length, num_input_channels)`, *optional*):
        Boolean mask to indicate which `past_values` were observed and which were missing. Mask values selected
        in `[0, 1]`:

        - 1 for values that are **observed**,
        - 0 for values that are **missing** (i.e. NaNs that were replaced by zeros).
    future_values (`torch.BoolTensor` of shape `(batch_size, prediction_length, num_input_channels)`, *optional*):
        Future target values associated with the `past_values`
    output_hidden_states (`bool`, *optional*):
        Whether or not to return the hidden states of all layers
    output_attentions (`bool`, *optional*):
        Whether or not to return the output attention of all layers
    return_dict (`bool`, *optional*):
        Whether or not to return a `ModelOutput` instead of a plain tuple.

Returns:
    `PatchTSTModelOutput` or tuple of `torch.Tensor` (if `return_dict`=False or `config.return_dict`=False)

Examples:

```python
>>> from huggingface_hub import hf_hub_download
>>> import torch
>>> from transformers import PatchTSTModel

>>> file = hf_hub_download(
...     repo_id="hf-internal-testing/etth1-hourly-batch", filename="train-batch.pt", repo_type="dataset"
... )
>>> batch = torch.load(file)

>>> model = PatchTSTModel.from_pretrained("namctin/patchtst_etth1_pretrain")

>>> # during training, one provides both past and future values
>>> outputs = model(
...     past_values=batch["past_values"],
...     future_values=batch["future_values"],
... )

>>> last_hidden_state = outputs.last_hidden_state
```N)r   rK  rP   c              3   ,   #    U H  oc  M  Uv   M     g 7fr   r   ).0vs     r2   	<genexpr>(PatchTSTModel.forward.<locals>.<genexpr>  s     =GqGs   	)rM  rM   rN  r   rX  rY  r   )r=   use_return_dictrP   rK  r(   r  r  r  r  r  r  rM  rM   rN  rn   rW  )rJ   r   r  r  rK  rP   r  scaled_past_valuesrX  rY  patched_valuesmasked_valuesr   encoder_outputr   s                  r2   rc   PatchTSTModel.forward>  sK   l &1%<k$++B]B]1B1N-TXT_T_TqTq$8$D $++JjJj 	 %!&!= *.[)U& );<"&,,~">M4"&,,~">4%du & 
 %779U9UWeWpWpqGs> BBG=G===",>>(66%00&
 	
r4   )r  r  r  r  r  NNNNN)re   rf   rg   rh   r   rB   r(   rm   r   rl   r   rn   rW  rc   ro   rp   rq   s   @r2   r  r  *  s    ~ * 6:04/3,0&*Z
\\Z
 %U\\2Z
  -	Z

 'tnZ
 $D>Z
 d^Z
 
u))	*Z
 Z
r4   r  c                   n   ^  \ rS rSrSrS\4U 4S jjrS\R                  S\R                  4S jr	Sr
U =r$ )	PatchTSTMaskPretrainHeadi  z%
Pretraining head for mask modelling
r=   c                 8  > [         TU ]  5         UR                  S:  a   [        R                  " UR                  5      O[        R
                  " 5       U l        [        R                  " UR                  UR                  5      U l
        UR                  U l        g Nr   )rA   rB   head_dropoutr   r   r   r   rE   ry   r   linearr  r|   s     r2   rB   !PatchTSTMaskPretrainHead.__init__  sh    :@:M:MPQ:Qrzz&"5"56WYWbWbWdii0C0CD#11r4   	embeddingrQ   c                     U R                  U R                  U5      5      nU R                  (       a  USS2SS2SS2SS24   nU$ )a  
Parameters:
    embedding (`torch.Tensor` of shape `(bs, num_channels, num_patches, d_model)` or
            `(bs, num_channels, num_patches+1, d_model)` if `cls_token` is set to True, *required*):
        Embedding from the model
Returns:
    `torch.Tensor` of shape `(bs, num_channels, num_patches, d_model)` or
                    `(bs, num_channels, num_patches+1, d_model)` if `cls_token` is set to True

Nr   )r  r   r  )rJ   r  s     r2   rc    PatchTSTMaskPretrainHead.forward  s>     KKY 78	!!QA+.Ir4   )r   r  r  r   rq   s   @r2   r  r    s4    2~ 2 %,,  r4   r  z*
    The PatchTST for pretrain model.
    c                      ^  \ rS rSrS\4U 4S jjr    SS\R                  S\\R                     S\\	   S\\	   S\\	   S	\
\\4   4S
 jjrSrU =r$ )PatchTSTForPretrainingi  r=   c                    > [         TU ]  U5        SUl        [        US9U l        [        U5      U l        U R                  5         g )NT)r=   )rA   rB   r  r  r   r  headrI  r|   s     r2   rB   PatchTSTForPretraining.__init__  s<     #"&1
,V4	 	r4   r   r  rK  rP   r  rQ   c                    Ub  UOU R                   R                  nU R                  UUUUSS9nU R                  UR                  5      n[
        R                  " SS9nU" XvR                  5      n	U	R                  SS9UR                  -  R                  5       UR                  R                  5       S-   -  n
UR                  nU(       d  U4USS	 -   nU
b  U
4U-   nU$ UnU$ [        XXR                  S
9$ )a  
Parameters:
    past_values (`torch.Tensor` of shape `(bs, sequence_length, num_input_channels)`, *required*):
        Input sequence to the model
    past_observed_mask (`torch.BoolTensor` of shape `(batch_size, sequence_length, num_input_channels)`, *optional*):
        Boolean mask to indicate which `past_values` were observed and which were missing. Mask values selected
        in `[0, 1]`:

        - 1 for values that are **observed**,
        - 0 for values that are **missing** (i.e. NaNs that were replaced by zeros).
    output_hidden_states (`bool`, *optional*):
        Whether or not to return the hidden states of all layers
    output_attentions (`bool`, *optional*):
        Whether or not to return the output attention of all layers
    return_dict (`bool`, *optional*): Whether or not to return a `ModelOutput` instead of a plain tuple.

Returns:
    `PatchTSTForPretrainingOutput` or tuple of `torch.Tensor` (if `return_dict`=False or
    `config.return_dict`=False)

Examples:

```python
>>> from huggingface_hub import hf_hub_download
>>> import torch
>>> from transformers import PatchTSTConfig, PatchTSTForPretraining

>>> file = hf_hub_download(
...     repo_id="hf-internal-testing/etth1-hourly-batch", filename="train-batch.pt", repo_type="dataset"
... )
>>> batch = torch.load(file)

>>> # Config for random mask pretraining
>>> config = PatchTSTConfig(
...     num_input_channels=7,
...     context_length=512,
...     patch_length=12,
...     stride=12,
...     mask_type='random',
...     random_mask_ratio=0.4,
...     use_cls_token=True,
... )
>>> # Config for forecast mask pretraining
>>> config = PatchTSTConfig(
...     num_input_channels=7,
...     context_length=512,
...     patch_length=12,
...     stride=12,
...     mask_type='forecast',
...     num_forecast_mask_patches=5,
...     use_cls_token=True,
... )
>>> model = PatchTSTForPretraining(config)

>>> # during training, one provides both past and future values
>>> outputs = model(past_values=batch["past_values"])

>>> loss = outputs.loss
>>> loss.backward()
```Tr   r  rK  rP   r  none	reductionr    r#   r  r   )r]  r^  rM   rN  )r=   r  r   r  rM  r   MSELossr   r   r   r   rM   r\  rN  )rJ   r   r  rK  rP   r  model_outputx_hatr]  loss_valmasked_lossrO  r   s                r2   rc   PatchTSTForPretraining.forward  s   J &1%<k$++B]B] zz#1!5/ " 
 		,889 zzF+778}}},|/@/@@EEG<K\K\K`K`KbejKjk%33ha!33G2=2I{nw.GN PWGN+^`w`w
 	
r4   r  r   )NNNN)re   rf   rg   rh   r   rB   r(   rm   r   rl   r   rn   r\  rc   ro   rp   rq   s   @r2   r  r    s    ~  6:/3,0&*a
\\a
 %U\\2a
 'tn	a

 $D>a
 d^a
 
u22	3a
 a
r4   r  c                   R   ^  \ rS rSrS\4U 4S jjrS\R                  4S jrSr	U =r
$ )PatchTSTClassificationHeadi+  r=   c                   > [         TU ]  5         UR                  U l        UR                  U l        [        R
                  " SS9U l        UR                  S:  a   [        R                  " UR                  5      O[        R                  " 5       U l
        [        R                  " UR                  UR                  -  UR                  5      U l        g Nr   	start_dimr   )rA   rB   r  pooling_typer   Flattenflattenr  r   r   r   rE   r   ry   num_targetsr  r|   s     r2   rB   #PatchTSTClassificationHead.__init__,  s    #11"//zzA.:@:M:MPQ:Qrzz&"5"56WYWbWbWdii 9 9FNN JFL^L^_r4   r  c                 p   U R                   (       a  USS2SS2SSS24   nOcU R                  S:X  a  UR                  SS9nOCU R                  S:X  a  UR                  SS9R                  nO[        SU R                   S35      eU R                  U5      nU R                  U R                  U5      5      nU$ )	a#  
Parameters:
    embedding (`torch.Tensor` of shape `(bs, num_channels, num_patches, d_model)` or
             `(bs, num_channels, num_patches+1, d_model)` if `cls_token` is set to True, *required*):
        Embedding from the model
Returns:
    `torch.Tensor` of shape `(bs, num_targets)`

Nr   r   r"   r#   r   pooling operator  is not implemented yet)	r  r  r   r   valuesrD   r  r  r   rJ   r  pooled_embeddingr   s       r2   rc   "PatchTSTClassificationHead.forward4  s     (Aq!4&((~~!~4%'(}}}3::01B1B0CCZ[\\<<(89T\\*:;<r4   )r   r  r  r  r  r&  rq   s   @r2   r  r  +  s&    `~ `  r4   r  z0
    The PatchTST for classification model.
    c                      ^  \ rS rSrS\4U 4S jjr\     SS\R                  S\	\R                     S\	\
   S\	\
   S\	\
   S	\	\
   S
\\\4   4S jj5       rSrU =r$ )PatchTSTForClassificationiP  r=   c                    > [         TU ]  U5        UR                  (       a  [        R	                  S5        SUl        [        U5      U l        [        U5      U l        U R                  5         g )N+Setting `do_mask_input` parameter to False.F)
rA   rB   r  loggerwarningr  r   r  r  rI  r|   s     r2   rB   "PatchTSTForClassification.__init__V  sT      NNHI#(F "6*
.v6	 	r4   r   target_valuesr  rK  rP   r  rQ   c                 V   Ub  UOU R                   R                  nU R                  UUUUSS9nU R                  UR                  5      nSn	Ub  [
        R                  " 5       n
U
" X5      n	U(       d  U4USS -   nU	b  U	4U-   nU$ UnU$ [        U	UUR                  UR                  S9$ )a  
past_values (`torch.Tensor` of shape `(bs, sequence_length, num_input_channels)`, *required*):
    Input sequence to the model
target_values (`torch.Tensor`, *optional*):
    Labels associates with the `past_values`
past_observed_mask (`torch.BoolTensor` of shape `(batch_size, sequence_length, num_input_channels)`, *optional*):
    Boolean mask to indicate which `past_values` were observed and which were missing. Mask values selected
    in `[0, 1]`:

    - 1 for values that are **observed**,
    - 0 for values that are **missing** (i.e. NaNs that were replaced by zeros).

Examples:

```python
>>> from transformers import PatchTSTConfig, PatchTSTForClassification

>>> # classification task with two input channel2 and 3 classes
>>> config = PatchTSTConfig(
...     num_input_channels=2,
...     num_targets=3,
...     context_length=512,
...     patch_length=12,
...     stride=12,
...     use_cls_token=True,
... )
>>> model = PatchTSTForClassification(config=config)

>>> # during inference, one only provides past values
>>> past_values = torch.randn(20, 512, 2)
>>> outputs = model(past_values=past_values)
>>> labels = outputs.prediction_logits
```NTr  r   r   )r]  rg  rM   rN  )
r=   r  r   r  rM  r   CrossEntropyLossrf  rM   rN  )rJ   r   r  r  rK  rP   r  r  y_hatr  r]  r   s               r2   rc   !PatchTSTForClassification.forwardd  s    X &1%<k$++B]B]zz#1!5/ " 
 		,889$&&(DE1Hha!33G/7/CxkG+GN JQGN.#&44#..	
 	
r4   r  r  )re   rf   rg   rh   r   rB   r   r(   rm   r   rl   r   rn   rf  rc   ro   rp   rq   s   @r2   r  r  P  s    ~   15-1/3,0&*D
\\D
  -D
 %TN	D

 'tnD
 $D>D
 d^D
 
u55	6D
 D
r4   r  z,
    The PatchTST for regression Model.
    c                   Z   ^  \ rS rSrSS\S\4U 4S jjjrS\R                  4S jr	Sr
U =r$ )	PatchTSTPredictionHeadi  r=   r   c                 H  > [         TU ]  5         UR                  U l        UR                  U l        UR                  U l        UR
                  U l        U R
                  (       d  U R                  (       a  UR                  nOUR                  U-  nU R                  (       Gd]  [        R                  " 5       U l	        [        R                  " 5       U l
        [        R                  " 5       U l        [        U R                  5       H  nU R                  R                  [        R                  " SS95        Uc:  U R                  R                  [        R                  " XAR                   5      5        O*U R                  R                  UR#                  U5      5        U R                  R                  UR$                  S:  a   [        R&                  " UR$                  5      O[        R(                  " 5       5        M     g[        R                  " SS9U l        Uc&  [        R                  " XAR                   5      U l        OUR#                  U5      U l        UR$                  S:  a   [        R&                  " UR$                  5      O[        R(                  " 5       U l        g)z
num_patches (`int`):
    The number of patches in the input sequence.
distribution_output (`DistributionOutput`, *optional*):
    The distribution output layer for probabilistic forecasting. If None, a linear output layer is used.
r"   r  Nr   )rA   rB   share_projectionr   r  r  ry   r   r  projectionsdropoutsflattensr  r   r  rE   prediction_lengthget_parameter_projectionr  r   r   r  
projectionr   )rJ   r=   r   distribution_outputrC   r$  rK   s         r2   rB   PatchTSTPredictionHead.__init__  s    	 & 7 7"(";";#11"// 2 2~~H~~3H$$$!}}DMMODMMMODM4223$$RZZ!%<=&.$$++BIIh@X@X,YZ $$++,?,X,XYa,bc$$H[H[^_H_RZZ0C0C%Degepepers 4 ::2DL"*"$))H6N6N"O #6"N"Nx"X>D>Q>QTU>U2::f&9&9:[][f[f[hDLr4   r  c                    U R                   (       a  USS2SS2SSS24   nOLU R                  S:X  a  UR                  SS9nO,U R                  S:X  a  UR                  SS9R                  nOUnU R
                  (       d  / n[        U R                  5       H]  nU R                  U   " USS2USS24   5      nU R                  U   " U5      nU R                  U   " U5      nUR                  U5        M_     [        R                  " USS9nO3U R                  U5      nU R                  U5      nU R!                  U5      n[#        U[$        5      (       a  [%        S U 5       5      nU$ UR'                  SS5      nU$ )	a2  
Parameters:
    embedding (`torch.Tensor` of shape `(bs, num_channels, num_patches, d_model)` or
             `(bs, num_channels, num_patches+1, d_model)` if `cls_token` is set to True, *required*):
        Embedding from the model
Returns:
    `torch.Tensor` of shape `(bs, forecast_len, num_channels)`

Nr   r   r"   r#   r   r   c              3   B   #    U H  oR                  S S5      v   M     g7f)r"   r   N)r*   )r  zs     r2   r  1PatchTSTPredictionHead.forward.<locals>.<genexpr>  s     =f;;q!,,fs   )r  r  r   r   r  r  r  r   r  r  r  r   r(   r"  r  r   r  r   rn   r*   )rJ   r  r  r   r$  s        r2   rc   PatchTSTPredictionHead.forward  sl    (Aq!4  F*#,>>a>#8 ""e+#,==Q=#7#>#>  $- $$F4223#'==#34DQ1W4M#N #'==#34D#E  $(#3#3A#67G#H ./ 4 [[Q/F  $||,<=#||,<= __%56Ffe$$=f==F  %%a+Fr4   )
r   r  r  r  r   r  r  r  r  r  r   )re   rf   rg   rh   r   rj   rB   r(   rm   rc   ro   rp   rq   s   @r2   r  r    s5    )i~ )iC )i )iV1 1 1r4   r  z,
    The PatchTST for prediction model.
    c                   :  ^  \ rS rSrS\4U 4S jjr     SS\R                  S\\R                     S\\R                     S\\	   S\\	   S	\\	   S
\
\\4   4S jjr\R                  " 5        SS\R                  S\\R                     S
\4S jj5       rSrU =r$ )PatchTSTForPredictioni  r=   c                   > [         TU ]  U5        UR                  (       a  [        R	                  S5        SUl        [        U5      U l        UR                  S:X  a  S U l        OUR                  S:X  a  [        UR                  S9U l        OjUR                  S:X  a  [        UR                  S9U l        OAUR                  S:X  a  [        UR                  S9U l        O[        SUR                   35      e[        XR                  R                  R                   U R                  S	9U l        U R%                  5         g )
Nr  Fmse	student_tr#   normalnegative_binomialUnknown distribution output )r  )rA   rB   r  r  r  r  r   r]  r  r   r  r   r   rD   r  r  r   r  rI  r|   s     r2   rB   PatchTSTForPrediction.__init__  s     NNHI#(F "6*
;;%'+D$))[8+9f>V>V+W(++x7+7F<T<T+U(++/BB+AfF^F^+_( #?@Z@Z?[!\]]*JJ))554KcKc
	
 	r4   r   r  r  rK  rP   r  rQ   c           	         Ub  UOU R                   R                  nU R                  UUUUSS9nU R                  UR                  5      nSn	U R
                  (       a  Un
OXR                  -  UR                  -   n
Ubr  U R
                  (       aE  U R
                  R                  XR                  UR                  S9n[        X5      n	[        U	5      n	O[        R                  " SS9nU" X5      n	UR                  nUR                  nU(       d  U
4USS -   nU	b  U	4U-   nU$ UnU$ [        U	U
UR                  UR                  UUS	9$ )
a  
Parameters:
    past_values (`torch.Tensor` of shape `(bs, sequence_length, num_input_channels)`, *required*):
        Input sequence to the model
    past_observed_mask (`torch.BoolTensor` of shape `(batch_size, sequence_length, num_input_channels)`, *optional*):
        Boolean mask to indicate which `past_values` were observed and which were missing. Mask values selected
        in `[0, 1]`:

        - 1 for values that are **observed**,
        - 0 for values that are **missing** (i.e. NaNs that were replaced by zeros).
    future_values (`torch.Tensor` of shape `(bs, forecast_len, num_input_channels)`, *optional*):
        Future target values associated with the `past_values`
    output_hidden_states (`bool`, *optional*):
        Whether or not to return the hidden states of all layers
    output_attentions (`bool`, *optional*):
        Whether or not to return the output attention of all layers
    return_dict (`bool`, *optional*):
        Whether or not to return a `ModelOutput` instead of a plain tuple.

Returns:
    `PatchTSTForPredictionOutput` or tuple of `torch.Tensor` (if `return_dict`=False or
    `config.return_dict`=False)

Examples:

```python
>>> from huggingface_hub import hf_hub_download
>>> import torch
>>> from transformers import PatchTSTConfig, PatchTSTForPrediction

>>> file = hf_hub_download(
...     repo_id="hf-internal-testing/etth1-hourly-batch", filename="train-batch.pt", repo_type="dataset"
... )
>>> batch = torch.load(file)

>>> # Prediction task with 7 input channels and prediction length is 96
>>> model = PatchTSTForPrediction.from_pretrained("namctin/patchtst_etth1_forecast")

>>> # during training, one provides both past and future values
>>> outputs = model(
...     past_values=batch["past_values"],
...     future_values=batch["future_values"],
... )

>>> loss = outputs.loss
>>> loss.backward()

>>> # during inference, one only provides past values, the model outputs future values
>>> outputs = model(past_values=batch["past_values"])
>>> prediction_outputs = outputs.prediction_outputs
```NTr  rX  rY  r   r  r   r    )r]  rd  rM   rN  rX  rY  )r=   r  r   r  rM  r  rY  rX  distributionro  rz  r   r  rc  rM   rN  )rJ   r   r  r  rK  rP   r  r  r  r  	y_hat_outr  r]  rX  rY  r   s                   r2   rc   PatchTSTForPrediction.forward4  sf   z &1%<k$++B]B] zz#1!5/ " 
 		,889##I 2 22\5E5EEI$''#77DD//|7I7I  E   |;+H5zzF3	9"" l\!B%77G/7/CxkG+GN JQGN*(&44#..
 	
r4   c                    U R                   R                  nU " USUSS9nU R                  (       av  U R                  R                  UR                  UR
                  UR                  S9n[        U5       Vs/ sH  oeR                  5       PM     nn[        R                  " USS9nOUR                  R                  S5      n[        US9$ s  snf )a  
Generate sequences of sample predictions from a model with a probability distribution head.

Parameters:
    past_values (`torch.FloatTensor` of shape `(batch_size, sequence_length, num_input_channels)`):
        Past values of the time series that serves as context in order to predict the future.
    past_observed_mask (`torch.BoolTensor` of shape `(batch_size, sequence_length, num_input_channels)`, *optional*):
        Boolean mask to indicate which `past_values` were observed and which were missing. Mask values selected
        in `[0, 1]`:

        - 1 for values that are **observed**,
        - 0 for values that are **missing** (i.e. NaNs that were replaced by zeros).

Return:
    [`SamplePatchTSTOutput`] where the outputs `sequences` tensor will have shape `(batch_size, number of
    samples, prediction_length, 1)` or `(batch_size, number of samples, prediction_length, num_input_channels)`
    for multivariate predictions.
NF)r   r  r  rK  r  r   r#   rj  )r=   num_parallel_samplesr  r  rd  rX  rY  r  sampler(   r"  r   ri  rJ   r   r  r  r   r  r   sampless           r2   generatePatchTSTForPrediction.generate  s    2  ${{?? #1!&	
 ##33@@**7== A L 7<<P6QR6Q**,6QGRkk'q1G00::1=G#g66 Ss   7Cr  r  r   r  r   )re   rf   rg   rh   r   rB   r(   rm   r   rl   r   rn   rc  rc   no_gradri  r#  ro   rp   rq   s   @r2   r  r    s    ~ @ 6:04/3,0&*k
\\k
 %U\\2k
  -	k

 'tnk
 $D>k
 d^k
 
u11	2k
Z ]]_ 6:-7\\-7 %U\\2-7 
	-7 -7r4   r  c                   Z   ^  \ rS rSrSrSS\4U 4S jjjrS\R                  4S jr	Sr
U =r$ )	PatchTSTRegressionHeadi  z
Regression head
r=   c                 
  > [         TU ]  5         UR                  U l        UR                  U l        UR
                  U l        X l        UR                  UR                  -  n[        R                  " SS9U l        UR                  S:  a   [        R                  " UR                  5      O[        R                  " 5       U l        Uc&  [        R                   " X1R"                  5      U l        g UR'                  U5      U l        g r  )rA   rB   output_rangey_ranger  r  r  r   ry   r   r  r  r  r   r   r   rE   r  r  r  )rJ   r=   r  rC   rK   s       r2   rB   PatchTSTRegressionHead.__init__  s    **#11"//#6 ,,v~~=zzA.:@:M:MPQ:Qrzz&"5"56WYWbWbWd& ii2D2DEDO1JJ8TDOr4   r  c                 @   U R                   (       a  USS2SS2SSS24   nOcU R                  S:X  a  UR                  SS9nOCU R                  S:X  a  UR                  SS9R                  nO[        SU R                   S35      eU R                  U R                  U5      5      nU R                  U5      nU R                  SL U R                  SL-  (       aF  [        R                  " U5      U R                  S	   U R                  S   -
  -  U R                  S   -   nU$ )
a!  
Parameters:
    embedding (`torch.Tensor` of shape `(bs, num_channels, num_patches, d_model)` or
            `(bs, num_channels, num_patches+1, d_model)` if `cls_token` is set to True, *required*):
        Embedding from the model
Returns:
    `torch.Tensor` of shape `(bs, output_dim)`

Nr   r   r"   r#   r   r  r  r   )r  r  r   r   r  rD   r   r  r  r  r+  r(   sigmoidr  s       r2   rc   PatchTSTRegressionHead.forward  s    (Aq!4&((~~!~4%'(}}}3::01B1B0CCZ[\\  <<5E(FG !12$$,T1IJ]]6*dll1oQ.OPSWS_S_`aSbbFr4   )r  r   r  r  r  r  r+  r   r   rq   s   @r2   r(  r(    s1    U~ U U"  r4   r(  z,
    The PatchTST for regression model.
    c                   D  ^  \ rS rSrS\4U 4S jjr\     SS\R                  S\	\R                     S\	\R                     S\	\
   S\	\
   S	\	\
   S
\\\4   4S jj5       r\R                  " 5        SS\R                  S\	\R                     S
\4S jj5       rSrU =r$ )PatchTSTForRegressioni	  r=   c                 H  > [         TU ]  U5        UR                  (       a  [        R	                  S5        SUl        [        U5      U l        UR                  S:X  a  S U l        OUR                  S:X  a  [        UR                  S9U l        OjUR                  S:X  a  [        UR                  S9U l        OAUR                  S:X  a  [        UR                  S9U l        O[        SUR                   35      e[        XR                  5      U l        U R!                  5         g )	Nr  Fr  r  r#   r  r  r  )rA   rB   r  r  r  r  r   r]  r  r   r  r   r   rD   r(  r  rI  r|   s     r2   rB   PatchTSTForRegression.__init__  s      NNHI#(F "6*
;;%'+D$))[8+9f>P>P+Q(++x7+7F<N<N+O(++/BB+AfFXFX+Y( #?@Z@Z?[!\]]*63K3KL	 	r4   r   r  r  rK  rP   r  rQ   c           	      `   Ub  UOU R                   R                  nU R                  UUUUSS9nU R                  UR                  5      nSn	Ub  U R
                  (       ap  U R
                  R                  U5      n
[        U Vs/ sH(  oR                  SU R                   R                  5      PM*     sn5      n[        X5      n	[        U	5      n	O[        R                  " SS9n	U	" X5      n	U(       d  U4USS -   nU	b  U	4U-   nU$ UnU$ [        U	UUR                  UR                   S	9$ s  snf )
a  
past_values (`torch.Tensor` of shape `(bs, sequence_length, num_input_channels)`, *required*):
    Input sequence to the model
target_values (`torch.Tensor` of shape `(bs, num_input_channels)`):
    Target values associates with the `past_values`
past_observed_mask (`torch.BoolTensor` of shape `(batch_size, sequence_length, num_input_channels)`, *optional*):
    Boolean mask to indicate which `past_values` were observed and which were missing. Mask values selected
    in `[0, 1]`:

    - 1 for values that are **observed**,
    - 0 for values that are **missing** (i.e. NaNs that were replaced by zeros).
    Whether or not to return a `ModelOutput` instead of a plain tuple.

Examples:

```python
>>> from transformers import PatchTSTConfig, PatchTSTForRegression

>>> # Regression task with 6 input channels and regress 2 targets
>>> model = PatchTSTForRegression.from_pretrained("namctin/patchtst_etth1_regression")

>>> # during inference, one only provides past values, the model outputs future values
>>> past_values = torch.randn(20, 512, 6)
>>> outputs = model(past_values=past_values)
>>> regression_outputs = outputs.regression_outputs
```NTr  r    r   r  r   r   )r]  ra  rM   rN  )r=   r  r   r  rM  r  r  rn   r-   r  ro  rz  r   r  r`  rM   rN  )rJ   r   r  r  rK  rP   r  r  r  r]  r  itemr   s                r2   rc   PatchTSTForRegression.forward)  s=   J &1%<k$++B]B]zz#1!5/ " 
 		,889$''#77DDUKRWXRW$yyT[[-D-DERWXY<7'-zzF3E1ha!33G+/+;tg'GN BIGN*$&44#..	
 	
 Ys   .D+c                 f   U R                   R                  nU " USUSS9nU R                  R                  UR                  5      n[        U5       Vs/ sH  oeR                  5       PM     nn[        R                  " USS9R                  SX0R                   R                  5      n[        US9$ s  snf )a:  
Generate sequences of sample predictions from a model with a probability distribution head.

Parameters:
    past_values (`torch.FloatTensor` of shape `(batch_size, sequence_length, num_input_channels)`):
        Past values of the time series that serves as context in order to predict the future.
    past_observed_mask (`torch.BoolTensor` of shape `(batch_size, sequence_length, num_input_channels)`, *optional*):
        Boolean mask to indicate which `past_values` were observed and which were missing. Mask values selected
        in `[0, 1]`:

        - 1 for values that are **observed**,
        - 0 for values that are **missing** (i.e. NaNs that were replaced by zeros).

Return:
    [`SamplePatchTSTOutput`] where the outputs `sequences` tensor will have shape `(batch_size, number of
    samples, num_targets)`.
NF)r   r  r  rK  r   r#   r    r  )r=   r  r  r  ra  r  r   r(   r"  r-   r  ri  r!  s           r2   r#  PatchTSTForRegression.generates  s    0  ${{?? #1!&	
 //<<W=W=WX278L2MN2MQ&&(2MN++g1-2227K[[MdMde#g66 Os   B.r%  r  r   )re   rf   rg   rh   r   rB   r   r(   rm   r   rl   r   rn   r`  rc   r&  ri  r#  ro   rp   rq   s   @r2   r1  r1  	  s    ~ 4  1559/3,0&*G
\\G
  -G
 %U\\2	G

 'tnG
 $D>G
 d^G
 
u11	2G
 G
R ]]_ 6:'7\\'7 %U\\2'7 
	'7 '7r4   r1  )r  r   r  r  r1  r  )NrT   N)NFr   r  rT  )Lri   r6  dataclassesr   typingr   r   r   r(   r   activationsr	   modeling_flash_attention_utilsr
   modeling_outputsr   modeling_utilsr   r   processing_utilsr   time_series_utilsr   r   r   utilsr   r   r   configuration_patchtstr   
get_loggerre   r  r  rm   rk   r3   r6   rs   listrl   rj   r   r   r   r   r   r   r  r  r  rW  r\  r`  rc  rf  ri  distributionsDistributionro  rz  r|  r  r  r  r  r  r  r  r  r  r  r(  r1  __all__r   r4   r2   <module>rH     s     ! , ,   " B / F & U U 9 9 2 
		H	%  $(,%II%<<% 
% <<	%
 U\\*% e_% % %%>U/		 U/p&		 &2 04',7%LL7%7% 'tn7% !%	7%
 7%z 04	A%LLA%$T3Y/A% 'tnA% 	A%H-ryy -`9"bii 9"xH299 HV "2o "2 "2J!		 !H5 5p;x- ;x| 
4+ 4 46 
:; : : 
:+ : : 
.+ . .4 
:k : : 2; 2 2#u""// # #%,, #*5<< *(5<<:P *fkfrfr *2 0		  0H3; 3;n 		  6 RYY  8 m
+ m
 m
`ryy 8 
l
4 l

l
^" "J 
T
 7 T

T
n 
]RYY ]
]@ 
y73 y7
y7x4RYY 4n 
M73 M7
M7`r4   