
    Ҍh             -       v   d dl Z d dlZd dlmZ d dlmZ d dlmZmZ d dl	m
Z
mZmZmZ d dlmZ d dlZd dlmZ d dlmZmZmZ d dlmZmZmZmZ d d	lmZ d d
lmZm Z m!Z! d dlm"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z,m-Z-m.Z. d dl/m0Z0m1Z1m2Z2m3Z3m4Z4 d dl5m6Z6m7Z7 d dl8m9Z: d dl;m<Z=  ed      Z> ed      Z?ej                  j                  ZAej                  j                  ddd      ZD eEd      \  ZFZGZHde
e
e?e>f   ge
e?e>f   f   fdZIde'fdZJd ZKd ZL eIeAj                  eAj                  g       e4       dddej                  ddfd              ZP eIeAj                  j                  eAj                  j                  g       e4       d               ZT eIeAj                  j                  eAj                  j                  g       e4       dd d!              ZUd'd"ZVd# ZWd(d%ZX eIeAj                  j                        d&        ZZ eIeAj                         e4       d'               Z[ eIeAj                  j                  eAj                  j                  eAj                  j                  eAj                  j                  g       e4d(d)      d*               Z^ eIeAj                  j                  eAj                  j                  g       e4       d+               Z_d, Z`d'd-ed.eaeb   d/ecfd0Zd eIeAj                  j                  eAj                  j                  g       e4       d1               ZfdZgd.eaeb   fd2Zh eIeAj                  j                  eAj                  j                  g       e4       d3               Zj eIeAj                  j                        dd4d5       Zm eIeAj                  j                        ej                  dddd6d7       Zo eIeAj                  j                  eAj                  j                  g       e4       ej                  dddd6d8              Zq eIeAj                  j                  eAj                  j                  g       e4       ej                  dddd6d9              Zt eIeAj                  j                  eAj                  j                  g       e4       ddddd6d:              Zv eIeAj                  j                  eAj                  j                  g       e4       d-ed.eaeb   d;ebd<ebfd=              Zx eIeAj                  j                        d'd>       Zzd? Z{ eIeAj                  j                        d@        Z} eIeAj                        	 	 	 d)dAedBedCedDee   dEee   dFeej                      fdG       Z eIeAj                        	 d*dHedIedJedFeej                      fdK       Z eIeAj                        dLdLddMdAedHedIedJedFeej                      f
dN       Z eIeAj                        	 	 	 	 	 	 	 d+dOej,                  dPej,                  dDee   dQee   dFeej                      dRecdSebdTebdUebfdV       Z eIeAj                  j                        d$dWd-ed.ebdXedYej,                  dZed[ecdefd\       Z eIeAj                  j                        d$dWd-ed.ebdXedYej,                  dZed[ecdefd]       Z e4        eIeAj                  j                        d^               Z eIeAj                  j                        dddd ddd_d`edZedaee   d)ee   dbee   dcebddecdefde       Z eIeAj                   j                  eAj                   j"                  g       e4       df               Z eIeAj                   j&                        d'dg       Z eIeAj*                  j                  eAj*                  j"                  g       e4       dh               Z eIeAj*                  j&                        d'di       Z eIeAj0                  j                        dj        Z eIeAj0                  j                        dk        Z eIeAj6                  j                        dl        Z eIeAj6                  j:                        dm        Z eIeAj>                  j                        dn        Z eIeAjB                  j                        ddddddodp       Z eIeAjF                  j                        d,dq       Z eIeAjH                  j                        d)dr       Z eIeAjL                  j                        d,ds       Z eIeAjN                  j                        dt        Z eIeAjR                  j:                        du        Zd-edvefdwZd-edxedyefdzZ	 d(d{edved|ecfd}Zd-dxedved~efdZdxededecdvefdZ	 d.dededAedefdZdefdZ eIeAjd                  j                  eAjd                  jf                  g       e4dd      d/dxededecfd              Z eIeAjj                  j                  eAjl                  j                  g       e4       dAedefd              Z eIeAjp                  g       e4dd      dAefd              ZdedefdZ eIeAjv                         e4       d-edxedecdefd              Z eIeAjx                         e4       d'd-edxedecdefd              Z eIeAjz                         e4       d'd-edecdefd              Z eIeAj|                         e4       d'd-edecdefd              Z eIeAj~                  j                        d0dxedecdecfd       Z eIeAj                  j                  eAj                  j                  g       e4       dAededefd              Z eIeAj                  j                        d'dxedecfd       Z eIeAj                  j                  eAj                  j                  g       e4ddd      dddd-edecdecdeeeef   fd              Z eIeAj                  j                  eAj                  j                  g       e4       dddedededecdef
d              Z eIeAj                  j                  eAj                  j                  g       e4ddd      d$ddxedecdeeeef   fd              Z eIeAj                  j                  eAj                  j                  g       e4ddd      d$dddxedecdecdeeeef   fd              Z eIeAj                  j                  eAj                  j                  g       e4       d$dddedededecdecdefd              Z eIeAj                         e4ddd      	 	 d1dededecdecdeeeef   f
d              Zdedeececf   fdZ eIeAj                  j                  eAj                  j                  g       e4dd      d2dxededeeef   fd              Z eIeAj                  j                  eAj                  j                  g       e4dddd      dxedeeeeef   fd              Z eIeAj                  j                        	 	 	 d3dxedecdecdee   fd       Zdededeeaeb   eaeb   f   fdZdededyee   deeef   fdZdAededecfdÄZ eIeAj                        d$ddddddĜdxededecdecdee   dee   dee   dee   deeeeef   fdń       Z eIeAj                  j                  eAj                  j                  g      d$dddƜdxededecdecdecdee   defdɄ       Z eIeAj                         e4ddd$̫      	 	 	 d4d-edxedecdecdecdeeef   fd΄              Z eIeAj                  j                        dτ        Z eIeAj                         e4       	 	 d5dAedededecdecdefdЄ              Zdф Zd҄ Z eIeAj                         e4       dӄ               Z eIeAj                         e4       dԄ               ZdՄ Z eIeAj                         e4d֫      dׄ               Z eIeAj                         e4d֫      d؄               Zdل Z eIeAj                         e4       dڄ               Z eIeAj                         e4       dۄ               Z eIeAj                  j                  eAj                  j                  eAj                  j                  eAj                  j                  g       e4d֫      d܄               Zd݄ Z eIeAj                         e4       dބ               Z eIeAj                         e4       d߄               Z eIeAj                  j                  eAj                  j                  eAj                  j                  eAj                  j                  g       e4d֫      d               Z eIeAj                         e4       d6d-ededefd              Z  eIeAj                         e4       ded-edededef
d              Z eIeAj                  j                  eAj                  j                  g       e4d$̫      dLdLdd              Z eIeAj
                  j                  eAj
                  j                  g       e4       dd4d              Z eIeAj                  j                        d7d       Z eIeAj
                  j                        d7d       Z
 eIeAj                  j                  eAj                  j                  g       e4       d*d              Z eIeAj                  j                        	 	 d0d       Z eIeAj                         e4d$̫      d               Zd Zd8dZ	 d*dej,                  dBej,                  deeaeb   ebf   deeaeb   ebf   deeaeb   ebf   decdebdeeeaeb   ebf      fdZd Z eIeAj*                  j                        dej,                  dBej,                  dDeej,                     deej,                     deej,                     decdedefd       Z eIeAj.                  j                        dej,                  dBej,                  dDej,                  deaeb   deaeb   deaeb   decdeaeb   debfd       Zej2                  j4                  rej                  j                  d dd      Z eIej                  j8                  j:                  j                        d        Z eIej                  j8                  j>                  j                        d        Z ej2                  jB                  rOej                  j                  ddd      Z" eIej                  jF                  jH                        d        Z%ej                  j                  ddd      Z& eIej                  jN                  jP                  j                         eIej                  jN                  jR                  j                        d               Z* eIej                  jN                  jP                  jV                        d        Z, eIej                  jN                  jZ                  j                         eIej                  jN                  jZ                  j\                        d               Z/ eIej                  jN                  jZ                  jV                         eIej                  jN                  jZ                  j`                        d	               Z1 eIej                  jN                  jd                  j                         eIej                  jN                  jf                  j                        d
               Z4ej                  j                  ddd      Z5 eIej                  jl                  jn                        	 	 	 	 d9d       Z8 eIej                  jl                  jr                        d        Z:d Z; eIeAjx                  j                        	 	 	 	 	 d:d       Z=d Z> eIeAj~                  j                        d        Z@ eIeAj                         e4       	 	 	 	 	 d:d              ZB eIeAj                         e4d֫      d               ZD eIeAj                  j                        d        ZF eIeAj                  j                        d        ZH eIeAj                  j                        d        ZJ eIeAj                         e4d֫      d               ZLded~efdZM eIeAj                         e4dd)      d               ZO eIeAj                         e4d֫      d               ZQ eIeAj                         e4dd)      d               ZS eIeAj                         e4d֫      d               ZU eIeAj                  j,                        d*d        ZW eIeAj                  j                  eAj                  j                  g       e4       d!               ZY eIeAj                  j                  eAj                  j                  g       e4       dd"d#ebfd$              ZZ eIej                  j                  j                  j                  ej                  j                  j                  j                  g       e4       d%               Z[ eIeAj                  j,                  eAj                  j,                  g      d&        Z^ eIeAj                  j                  g      d'        Z` eIeAj                  j                  eAj                  j                  g       e4d$̫      dLdLdd(              Zb eIeAj                  j,                  g      d)        Zd eIeAj                  j                  eAj                  j                  g      ddd*d+       Zg eIeAj                  j                  g      ddd*d,       Zi eIeAj                  g       e4       d-               Zk eIeAj                  g      d.        Zm eIeAj                  g      d/        Zo eIeAj                  g      d0        Zq eIeAj                  g      d1        Zs eIeAj                  g      d2        Ztd3ebd4ebdebfd5Zud6 Zv eIeAj                  g      dDee   fd7       Zx eIeAj                  g      d8        Zz eIeAj                  g      d9        Z| eIeAj                  j                        d:        Z~ eIeAj                         e4       d;               Z eIeAj                  j                        	 	 	 	 	 	 d;d<       Z eIeAj                  j                        d=        Zd(d>Z eIeAj                  j                  eAj                  j                  g       e4       d<dd?d@              Z eIeAj                  j                  eAj                  j                  g      dA        Z eIeAj                  j&                  eAj                  j                  eAj                  j&                  eAj                  j                  eAj                  j                  eAj                  j                  g       e4d(d)      d=dB              Z eIeAj                  j                        dC        Z eIeAj"                  j                        dD        Z eIeAj&                  j                        dE        Z eIeAj*                  j,                  eAj.                  j,                  eAj*                  j,                  eAj.                  j,                  eAj0                  j                  eAj2                  j                  eAj4                  j                  g      dF        Z eIeAj8                  j,                  eAj:                  j,                  eAj8                  j,                  eAj:                  j,                  g      ddG       Z eIeAj>                  j                  eAj>                  j@                  g      dH        ZdI Z eIeAjF                  j,                  eAjF                  j,                  g      dJ        Z eIeAjJ                  j,                  eAjJ                  j,                  g      dK        Z eIeAjN                  j                        dL        Z eIeAjR                  j,                  eAjR                  j,                  g      dM        Z eIeAjV                  j,                  eAjV                  j,                  g      dN        Z eIeAjZ                  j                        dO        Z eIeAj^                  j,                         e4       ddefdP              Z eIeAjb                  g       e4       	 d>dQ              Z eIeAjf                  g      	 d>dR       Z eIeAjj                  g      	 d>dS       Z eIeAjn                  j                  eAjp                  j                  g      d'dT       Z eIeAjt                  j,                        dU        Z eIeAjx                  j                        dV        Z eIeAj|                        dW        Z eIeAj                         e4       dX               Z eIeAj                        dY        Z eIeAj                  j                        d'dZ       Z eIeAj                  j                        d[        Zǐd,d\Z eIeAj                  j                        d]        Z eIeAj                  j                         d^        Zːd_ Z̐d` Z͐da Zΐdb Z	 d'dAedcebddebdeebdfebdgebdhebdiebdjebdkebdlebdmebdnebdoebdpebdqebdrebdsebdtebduebdedvecf,dwZАdx ZdAededcebddebdeebdfebdgebdhebdiebdjebdkebdlebdpebdqebdrebdsebdtebduebdef&dyZҐdz Z eIeAj                  j                        d{        Z eIeAj                  j                        	 	 	 	 d9d|       Z eIeAj                  j                        d}        Z eIeAj                         e4dd)      	 	 	 	 d9d~              Z eIeAj                         e4d֫      d               ZdAedefdZ G d de      ZdAededebfdZ eIeAj                  j                        d        Z eIeAj                         e4       d               Z eIeAj                         e4d֐d      d               Z eIeAj                  j                  g      d        Z eIeAj                  j                        	 	 	 	 	 d?d       Z eIeAj                  j                  eAj                  j                  g       e4       ddddddd              Z eIeAj                  j                  eAj                  j                  g       e4       ddddddd              Z eIeAj                  j                        d        Z eIeAj                  j                        d        Z eIeAj                  j                        d@d       Zd(d.ebdebdecfdZd Zd Z eIeAj                  j                        d'd       Zd'dZd*dZd Zd*dZdAdZ eIeAj                  j                        d        Z eIeAj                        d        Z eIeAj                  j                   eAj                  j                  eAj                  j                  eAj                  j                  g       e4       d*d              Z eIeAj                  j                   eAj                  j                  eAj                  j                  eAj                  j                  g      d*d       Z eIeAj                  g      	 	 	 	 dBdededededecdecdee   fd       Zdedeebdf   fdZ eIeAj                  g      	 	 	 	 dBdedededee   decdedecdecdee   fd       Z
 eIeAj                  g      	 	 	 	 	 dCdedededee   dedecdecdee   fd       Z eIeAj                  g      	 d*dededededededededebdebdedecdededee   fd       Z eIeAj                  g      	 	 	 	 dDdededededecdee   dee   fd       Z eIeAj"                  g      	 	 d,dedededededededecdee   dee   fd       Z eIeAj&                  g      	 	 	 dEdedededee   decdecdee   fd       Z eIeAj*                  g      	 	 dFdededededee   dedededededeaec   decdee   fd       Z eIeAj.                  g      	 d*dedededededededededededebdebdedecdee   f d       Z eIeAj2                  g      	 	 	 	 	 d?dedededee   dee   debdebdedecdecdee   deeb   deeb   dee   dee   fdĄ       Z eIeAj6                  g      	 	 	 d)dededededededededebdebdedecdededee   deeb   deeb   f"dń       Z eIeAj:                  g      	 	 	 	 	 dGdedededDee   dee   dee   deeb   deeb   dedebdecdee   dee   dee   deeb   fd΄       Z eIeAj>                  g      	 	 	 dAdededededDee   dee   dee   dej@                  dej@                  dededededebdecdee   deeb   decf$d҄       Z! eIeAjD                  j                  g      	 	 	 	 dHd-ej,                  dJej,                  dej,                  dej,                  dDeej,                     deej,                     dFeej                      decfdׄ       Z# eIeAjH                  jJ                  eAjH                  jL                  g       e4       d(d؄              Z' eIeAjP                  jJ                        d(dل       Z) eIeAjT                  j                  eAjT                  j                  g       e4       d'dd4dڄ              Z+dۄ Z,d܄ Z- eIeAj\                  j                  eAj^                  j                  g      d*d݄       Z. eIeAj`                  j                  eAjb                  j                  g      d,dބ       Z0 eIeAjd                  j                  eAjf                  j                  g      	 	 d,dedeeebej@                  f      deeebej@                  f      dee   dee   f
d       Z2 eIeAjh                  j                  eAjj                  j                  g      d)d       Z4 eIeAjl                  j                  eAjl                  jn                  eAjl                  j                  eAjl                  jp                  g      dId       Z9d Z: eIeAjv                  j                        	 	 d,d       Z< eIeAjz                  j                        d        Z= eIeAj|                  j                        d        Z>d Z?d Z@ eIeAj                  j                  eAj                  j                  g      d<d       ZC eIeAj                  j                        dJd       ZD eIeAj                  j                        dKd       ZF eIeAj                         e4       	 dLd              ZH eIeAj                  j                  eAj                  j                  g       e4d(d)      d=d              ZJej                  ZLd ZM eIeAj                  j                        d        ZN eIeAj                  j                        d        ZO eIeAj                  j                        d        ZQ eIeAj                  j                        d        ZR eIeAj                  j,                  eAj                  j                  g       e4       dddd              ZU eIeAj                  g       e4       dMd              ZW eIeAj                  j                  eAj                  j                  g      	 	 d,d       ZZ eIeAj                  j                  g      	 	 d,d       Z\ eIeAj                  j                        d        Z] eIeAj                  j                  eAj                  j                  g       e4       d)d              Z^ eIej                  j                  j                        d        Z_ eIej                  j                  j                        d        Z` eIeAj                         e4       dddddd               Zbd Zc eIeAj                        d        Ze eIeAj                        	 dNd       Zg eIeAj                        	 dNd       Zi eIeAj                        	 dNd       Zk eIeAj                         e4       dddd              Zm eIeAj                         e4       debd-edefd	              Zo eIeAj                        d-efd
       Zq eIeAj                         e4d$̫      d-edefd              Zr eIeAj                         e4       d-edefd              Zsd Zt	 	 	 	 	 dOdededeej,                     deej,                     dee   dDee   deej,                     dFeej                      decfdZu eIeAj                         e4       	 	 	 d)dededee   dDee   dFeej                      defd              Zw eIeAj                  j                  g      	 	 	 	 	 dOdej,                  dej,                  dej,                  dej,                  deej,                     dDeej,                     deej,                     dFeej                      decfd       Zy eIeAj                         e4       ded.ebdecdefd              Z{ eIeAj                         e4       dd              Z} eIeAj                         e4       	 	 	 dPdBed)edebdecdecdefd              Z~ eIeAj                  j                        	 dQd(edbeae   deaeb   defd       Zd Zd  Z eeAj                          eeAj                          eeAj
                          eeAj                          eeAj                          eeAj                          eeAj                          eeAj                          eeAj                          eeAj                          eeAj                          eeAj                          eeAj                          eeAj                           eeAj"                          eeAj$                          eeAj&                          eeAj(                          eeAj*                          eeAj,                          eeAj.                         d! Z eIeAj2                         e4       d"               Z eIeAj4                         e4       dLd#d$              Z eIeAj6                         e4       dLd#d%              Z eeAj2                        Z eeAj4                        Z eeAj6                        Zd dl5Zd dlZd dlZd& Z e        y(R      N)Sequence)Enum)reducewraps)CallableOptionalTypeVarUnion)	ParamSpec)SymBoolSymFloatTensor)_add_op_to_registry_convert_out_paramsglobal_decomposition_table
meta_table)
OpOverload)_prim_elementwise_meta$ELEMENTWISE_PRIM_TYPE_PROMOTION_KINDview_of)BoolLikecorresponding_complex_dtypecorresponding_real_dtypedefinitely_contiguouselementwise_dtypesELEMENTWISE_TYPE_PROMOTION_KIND	FloatLikeIntLikeis_contiguousmake_contiguous_strides_forNumbersuggest_memory_format
TensorLike)_maybe_convert_to_dtype_maybe_resize_out_resize_output_check_safe_copy_outout_wrapper)_broadcast_shapes_maybe_broadcast)_config)_pytree_T_PatenIMPLMeta   returnc                       fd}|S )Nc                 V     t                 fd}t        j                  |        S )Nc                 (    t        t        |        y N)r   r   )opfns    X/var/www/html/aiagenthome/venv/lib/python3.12/site-packages/torch/_meta_registrations.pyregisterz0register_meta.<locals>.wrapper.<locals>.registerA   s    
B3    )r   pytree	tree_map_)r9   r;   r8   s   ` r:   wrapperzregister_meta.<locals>.wrapper>   s)     $	4 	2&	r<    )r8   r?   s   ` r:   register_metarA   =   s     Nr<   type_promotionc                     t        j                  |d| i\  }}|D cg c]  }t        ||       }}t        | }t	        |dt
        j                  iS c c}w )Ntype_promotion_kindrB   )utilsr   r$   r*   r   r   DEFAULT)rB   args_result_dtypexs        r:   elementwise_metarK   J   st    
 ..	*OA| ?CCd#A|4dDC T"D "	BJJ  Ds   Ac                     t         j                  t         j                  t         j                  t         j                  t         j
                  t         j                  i}|j                  | |       S r7   )torch	complex32halfcfloatfloatcdoubledoubleget)dtypefrom_complexs     r:   toRealValueTyperW   ^   sE    ekku||L
 E5))r<   c                 l     t        t         g|       t        j                   k(   fd       y )Nc                      d d  S )Nzoutput with shape z# doesn't match the broadcast shape r@   )broadcasted_shape
self_shapes   r:   <lambda>z)check_inplace_broadcast.<locals>.<lambda>k   s    $ZL0STeSfgr<   )tupler)   rM   _check)r[   
args_shaperZ   s   ` @r:   check_inplace_broadcastr`   g   s0    /
HZHI	LLZ'gr<   Fc	                 <   	 t         t        j                        r(t        j                   j	                         dk(  d        t        t        j                        r(t        j                  j	                         dk(  d        t        d  fD              rZt        j                  t        j                               		nFt        j                  t        j                        	fd       nxs t        j                         t        t        j                        sJ t        j                  t        t               fd       t        t              sJ t        j                  dk\  d        t        j                  f|d||	      S )
Nr   c                       yNz:linspace only supports 0-dimensional start and end tensorsr@   r@   r<   r:   r\   z(meta_linspace_logspace.<locals>.<lambda>       Pr<   c                       yrc   r@   r@   r<   r:   r\   z(meta_linspace_logspace.<locals>.<lambda>   rd   r<   c              3   <   K   | ]  }t        |t                y wr7   )
isinstancecomplex).0args     r:   	<genexpr>z)meta_linspace_logspace.<locals>.<genexpr>   s     
C/B:c7#/Bs   c                      d  d S )Nzlinspace(): inferred dtype z& can't be safely cast to passed dtype r@   )default_complex_dtyperU   s   r:   r\   z(meta_linspace_logspace.<locals>.<lambda>   s    56K5LLrsxryzr<   c                      dt              j                   dt               j                   dt              j                   dS )Nz4received an invalid combination of arguments - got (, ))type__name__)endstartstepss   r:   r\   z(meta_linspace_logspace.<locals>.<lambda>   sB     u+r$s),,-RU0D0D/EQHr<   c                       y)Nz$number of steps must be non-negativer@   r@   r<   r:   r\   z(meta_linspace_logspace.<locals>.<lambda>   s    %Kr<   metarU   layoutdevice
pin_memoryrequires_grad)rg   rM   r   r^   dimanyrE   r   get_default_dtypeis_complex_dtyperU   _check_typer   empty)
rt   rs   ru   baserU   rz   ry   r{   r|   rm   s
   ``` `    @r:   meta_linspace_logspacer   o   sL    %&IIK1P	
 #u||$GGINP	

 
CsE/B
CC % A A##%!
 =)ELL&&u-z
 2002eU[[))) 
5'"	H
 eW%%%	LL!KL;;	# r<   c                    t        j                  j                  t         j                  k(  fd       t        j                  | j                         dk(  xr j                         dk7   d        | j                  j                        S )Nc                  "    d j                    S )Nz2take(): Expected a long tensor for index, but got rU   indexs   r:   r\   zmeta_take.<locals>.<lambda>   s    DU[[MRr<   r   c                       y)Nz*take(): tried to take from an empty tensorr@   r@   r<   r:   r\   zmeta_take.<locals>.<lambda>   s    <r<   )rM   r^   rU   long_check_indexnumel	new_emptyshape)selfr   s    `r:   	meta_taker      sm     
LLuzz!R
 
ZZ\Q55;;=A#56< >>%++&&r<   r}   c                T     j                   }j                   }t        j                  ||k(  d        t        j                   j                        dk(  xr j                        dk(   fd       t	         j
                  j
                        } j                  |      S )Nc                       y)Nz=linalg.cross: inputs must have the same number of dimensions.r@   r@   r<   r:   r\   zlinalg_cross.<locals>.<lambda>       Or<   r2   c                  V    d  dj                          dj                          S )Nzlinalg.cross: inputs dimension z must have length 3. Got  and size)r}   otherr   s   r:   r\   zlinalg_cross.<locals>.<lambda>   s1    -cU 399S>"%

3'8:r<   )ndimrM   r^   r   r)   r   r   )r   r   r}   x_dy_d	out_shapes   ```   r:   linalg_crossr      s     ))C
**C	LLs
O 
LL		#!4

31 4	
 "$**ekk:I>>)$$r<   c                 r   ddl mmm} fd}fd}t	        |       dk(  rdgt	        |      z  S t        t        j                  | d      } ||dk(        }|r | || |            r|S dgt	        |      z  }	|rUt        t	        |      dz
  dd      D ]7  }
|
t	        |      dz
  k(  rd|	|
<   t        ||
dz      d      |	|
dz      z  |	|
<   9 |	S t	        |      dz
  }
|d   }d}d}t        t	        |       dz
  dd      D ]  }|| |   z  }|dk(  s' || |dz
     dk7        s" |||dz
     ||z  k7        s7|
dk\  rN |||k        s |||
   dk(        r5||z  |	|
<   |||
   z  }|
dz  }
|
dk\  r |||k        r& |||
   dk(        r5 |||k7        r y |dkD  s||dz
     }d}d} |
dk7  ry |	S )Nr   )guard_or_falseguard_or_truesym_eqc                     r |       S | S r7   r@   )rJ   r   size_obliviouss    r:   maybe_guard_or_falsez-_compute_stride.<locals>.maybe_guard_or_false   s    !!$$r<   c                     r |       S | S r7   r@   )rJ   r   r   s    r:   maybe_guard_or_truez,_compute_stride.<locals>.maybe_guard_or_true   s     ##r<      r   )
%torch.fx.experimental.symbolic_shapesr   r   r   lenr   operatormulrangemax)	old_shape
old_stride	new_shaper   r   r   r   r   
zero_numel
new_strideview_dchunk_base_stridetensor_numel
view_numeltensor_dr   r   s      `           @@r:   _compute_strider      s2     9~sS^##8<<A.E%eqj1J*6)Y+GHs9~%JC	NQ.B7FY!++%&
6" 	&1*-q1Jvz4JJ 6"	 8 ^aF"2LJ#i.1,b"5	(++q=	(Q, 71 <=#8a<(L;L,LL A+#J$=>'	&(9Q(>?%/2C%C
6"i//
! A+#J$=>'	&(9Q(>? #:#=>!|$.x!|$<! 
/ 60 |r<   c                     ddl m t        fd| j                         D              xs8 t        fd| j	                         D              xs t        fd|D              S )Nr   )has_hintc              3   0   K   | ]  } |         y wr7   r@   ri   sr   s     r:   rk   z+_view_has_unbacked_input.<locals>.<genexpr>  s     .XOX   c              3   0   K   | ]  } |         y wr7   r@   r   s     r:   rk   z+_view_has_unbacked_input.<locals>.<genexpr>  s     3
18A;
r   c              3   0   K   | ]  } |         y wr7   r@   r   s     r:   rk   z+_view_has_unbacked_input.<locals>.<genexpr>  s     .18A;r   )r   r   r~   r   stride)ar   r   s     @r:   _view_has_unbacked_inputr     sM    > 	.QVVX.. 	/3
33	/...r<   Tc                 (    ddl m}m} t        j                  d      t        j
                   j                                j                  dk(  rR }D ]:  }t        j                  |dk(         t        j                  j                  |d      }< | u rt               S |S t              dk(  r\ } j                  D ]:  }t        j                  |dk(         t        j                  j                  |d      }< | u rt               S |S t!        t"        j$                  d      }t        j                   j                         |k(   fd       t              t         j                        k(  r$ | | j                              rt               S |rt'               r3nt)               r't        j*                        } j-                  |      S t/         j1                          j3                         |      }	|	 j-                  |	      S |rHt        j4                  j6                  j8                  j:                  st=               rt?         d	      S d
 j                   d j3                          d d}
tA        |
      )Nr   )r   r   F)validater   r   c                  *    d j                    d dS )Nz&Could not reshape a tensor with shape  as a tensor with shape !r   r   r   s   r:   r\   z%_view_unbacked_meta.<locals>.<lambda>E  s    8	AYZ_Y``abr<   )r   )size_oblivious_enabledz Cannot view a tensor with shape z and strides r   r   )!r   r   r   rE   extract_shape_from_varargs
infer_sizer   r   rM   r^   _refs	unsqueezer   r   r   squeezer   r   r   r   r   r    
as_stridedr   r   r   fxexperimentalr+   backed_size_obliviousr   _view_unbacked_meta
ValueError)r   r   r   r   r   _alengthshape_numelstridesnew_stridesmsgs   ``         r:   r   r   !  s"   L ,,UUCE UAGGI.E 	vv{FLL1%&&r2.B  71:I 5zQggFLL1%$$R,B  71:Iua0K	LL		[ b
 5zS\!nVE1775K&Lqz#9Q}Q?O33E:||E7++!	!((*e4JK ||E;// %%;;#Au-"1eEJJ,QWWI]188:,Nfglfmmn
oC
S/r<   c                     t         j                  j                  j                  j                  st        | |      rt        | |      S t        j                  j                  | g|ddiS )N
allow_copyF)	rM   r   r   r+   r   r   r   r   _reshape_view_helperr   s     r:   
_view_metar   e  sV    xx$$::>V	5? #1e,,{{//LELeLLr<   c                 |    t        | d       t        | d       t        j                  | t        j                        S )Nzlinalg.matrix_expmemory_format)squareCheckInputscheckFloatingOrComplexrM   
empty_likecontiguous_formatr   s    r:   linalg_matrix_expr   o  s3     d/04!45D0G0GHHr<   valuesindicesc                 Z   t        j                  | j                  | j                  | j                        }t        j                  | j                  | j                  t         j
                        }| j                         dk7  r%| j                  dk7  rt        || j                         ||fS )Nrz   rU   r   )	rM   r   r   rz   rU   int64r   r   maybe_wrap_dim)r   r}   r   r   s       r:   	cummaxminr   w  sp    
 [[DKKtzzJFkk$**T[[LGzz|qTYY!^sDII&7?r<   c                 x    t        || j                         t        j                  | t        j                        S Nr   )r   r   rM   r   r   )r   r}   s     r:   logcumsumexpr     s+     3		"D0G0GHHr<   c                D   |j                   }t        |      }||z
  }t        t        |            }t        |      D 	cg c]  }	d }
}	|D ]  }d|
|<   	 g g }}|D ]*  }|
|   s|j	                  |       |j	                  |       , ||z   }t        |      }|j                         |d | }|j                  fdd       |||d  z   }|j                  |      }dgt        |j                  |d        z   }|j                  |      }|j                  d      }||d<   t        |      }t        t        |            D ]  }|||      ||dz   <    | j                  |t        j                         t        |      D 	cg c]  }	d }}	d}|dz
  }|dk\  r0|| j                  d      z  |||   <   ||||      z  }|dz  }|dk\  r0t        ||      D ]  }| j                  d||z
  z         |||   <   ! | j                  ||| j                                | S c c}	w c c}	w )	NFTc                     |    S r7   r@   )rJ   self_stridess    r:   r\   z_exec_fft.<locals>.<lambda>  s	    <?r<   keyreverser   r   r   r   )r   r   listr   appendr   sortpermuter   reshaper   resize_rM   r   as_strided_storage_offset)outr   	out_sizesr}   forwardr   signal_ndim
batch_dimsdim_permuterH   is_transformed_dimdleftright	batch_endtmpinputbatched_sizes
batch_sizebatched_out_sizesiout_stridesbatch_numelr   s                          @r:   	_exec_fftr    sF   99Dc(K#J uT{#K).t5A%5 $1  b%D!!$KKNLLO	 
 ,KD	I;;=L
jy
!CHH*DH9IJ//KLL%E D4JK 899MMM-(EAJ!M!]+3s8_#,SV#4!a%  KK!1H1HKI $Dk*k1kK*KQA
q&&1CJJqM&AKN#yQ00	Q q& :t$&)jja*n1E&FKN# %OOI{C,>,>,@AJW 6@ +s   	H 	Hr   r}   exclude_lastc                     t        |      }| j                         |d t        |      t        |      z
   j	                  fd       |S )Nc                     |    S r7   r@   )r  r   s    r:   r\   z_sort_dims.<locals>.<lambda>  s	    l1or<   )r   )r   r   r   intr   )r   r}   r  sorted_dimsr   s       @r:   
_sort_dimsr    sL    s)K;;=L6#k"S%667<<% =  r<   c                 
   t        j                  | j                  j                         |s| j	                         S t        | |      }| j                  | j                               }t        || | j                         ||      S )Nr  )	rM   r^   rU   
is_complexcloner  r   r   r  )r   r}   normalizationr  r  r  s         r:   meta_fft_c2cr$    sb     
LL&&'zz|T3'K
..
%CS$		['JJr<   c                 f    t        |       t        kD  st        |       dk\  r| d   dk(  r	| d   dk(  ryy)N   r   r   FT)r   cufft_max_ndimr   s    r:   use_optimized_cufft_pathr(    s3    
3x. SX]s1v{s1vQR{r<   c                 z   t        j                  | j                  j                         t	        | j                               }t	        |      }|d   }||   dz  dz   }t	        |      }|||<   |r|||<   t        |       dk(  st        |       dk(  rz| j                  |t        j                  | j                              }	| }
t        |       dk(  rt        |      rt        |	|
||d       nt        |      dk(  r|n|}t        |	|
||gd       t        |      dkD  r0| j                  |t        j                  | j                              }
|d d }|rx|
|	}
}	|
j                         |j                  fd	d
       t        t         t        |            }|t        |      |z
  d  }t        |	|
||d       |d t        |      |z
   }|rx|s:|	j                  |      ||   k7  r#|
j#                  |t         j$                         |
}	|	S | j                  |t        j                  | j                              S )Nr   r&  r   cudaxpur   Tr   c                     |    S r7   r@   )r  r   s    r:   r\   zmeta_fft_r2c.<locals>.<lambda>  s	    '!*r<   r   r   )rM   r^   rU   is_floating_pointr   r   device_hintr   rE   r   r(  r  r   r   r   minr'  r  r   )r   r}   r#  onesidedinput_sizesr  last_dimlast_dim_halfsizeonesided_sizesoutputworking_tensortarget_sizesr  max_dims	last_dimsr   s                  @r:   meta_fft_r2cr:    s>    
LL--.tyy{#K[!I2wH#H-2Q6+&N0N8/	(4F"k$&75&@ U>>tzzJ   
 t&+CC+HfnidK ),CA9>LfnlXJPTU3x!|!%U%F%Ftzz%R "0 "
 cr(K)7(//1  ,d !  ~s;/?@'K(88(C(EF	NNIt **GC,<x,GH  {{8$	((;;&&y@W@W&X' ~~U>>tzzJ  
 	
r<   )	generatorc                B    t        |t        j                  | g            S r7   )r%   rM   Size)nr;  r  s      r:   meta_randpermr?  %  s    S%**aS/22r<   rU   ry   rz   r{   c                6    t        j                  | ||||      S Nr@  rM   r   )r>  rU   ry   rz   r{   s        r:   meta_randperm_defaultrD  *  s      ;;	vf r<   c                x     dt        j                   kD   fd       t        j                  |||||      S )Nr   c                      d d  S Nz:random_ expects 'from' to be less than 'to', but got from=z >= to=r@   highlows   r:   r\   zmeta_randint.<locals>.<lambda>F      LSEQXY]X^_r<   r@  rM   r^   r   )rI  r   rU   ry   rz   r{   rJ  s   `     @r:   meta_randintrM  8  s>     C	LLs
_ ;;E&J r<   c                t     t        j                   kD   fd       t        j                  |||||      S )Nc                      d d  S rG  r@   rH  s   r:   r\   z"meta_randint_low.<locals>.<lambda>[  rK  r<   r@  rL  )rJ  rI  r   rU   ry   rz   r{   s   ``     r:   meta_randint_lowrP  M  s9     
LLs
_ ;;E&J r<   c                6    t        j                  | ||||      S rB  rC  )r   rU   ry   rz   r{   s        r:   meta_rand_defaultrR  b  s      ;;E&J r<   r#  lastdimc                    t        j                  | j                  j                         t	        |       dk(  rt        | j                               }|||d   <   | j                  |t        | j                              }t        |      r.t        || j                  t         j                        ||d      S t        |      dkD  rt        | |d d d|      }n | j                  t         j                        }t        ||||d   gd      S | }t        |      dkD  r|d d }t        | ||d      }|dd  }t        |j                               }|||d   <   | j                  |t        | j                              }	t        |	|||d      S )	Nr*  r   r   r   Fr   r   r   )rM   r^   rU   r!  r.  r   r   r   rW   r(  r  r"  r   r   r$  )
r   r}   r#  rS  r  r5  tempr  c2c_dimsr  s
             r:   meta_fft_c2rrW  j  sk    
LL&&'4F"%	$	#b'	1LM#C(

)@)@
A  3x!|#D#cr(Aw?zz0G0GzHVT9s2wiOO s8a<3BxH xNEbc(C&	$	#b'nnYodjj.InJeYUCCr<   c                 J   ddl m}  ||       s#t        j                  |       dk(  rt	        d      t        |t              ra|j                  | |      }| j                         |j                         k7  r.t        j                  j                  || j                                | S )Nr   )free_unbacked_symbolsr   zQmore than one element of the written-to tensor refers to a single memory location)r   rY  rM   _debug_has_internal_overlapRuntimeErrorrg   r   tor   r/   expand_copydefault)r   srcnon_blockingrY  intermediates        r:   
meta_copy_rb    s     L "$'E,M,Md,SWX,X_
 	
 #vvvdL199;,++--$$\499;?Kr<   c                     t        | j                               }t        | j                               }|| j                         k\  rdn
||   ||   z  }|j	                  |d       |j	                  ||       ||fS Nr   )r   r   r   r}   insert)tensorr}   result_sizesresult_stridesr   s        r:   inferUnsqueezeGeometryri    sq    &L&--/*NVZZ\)|C/@>RUCV/VJQ#z*''r<   c                     t        || j                         dz         }t        | |      \  }}| j                  ||       | S rd  )r   r}   ri  r  )r   r}   g_sizes	g_stridess       r:   meta_unsqueeze_rm    s>    
dhhj1n
-C/c:GYWi(Kr<   r  weight_metabias_activation_opt	out_dtypec                 8   t        | j                        }|*|j                  d      |j                  d      k(  sJ d       |j                  d      | j                  d      dz  k(  sJ |j                  d      |d<   t        | j                        dk(  sJ d       d| j                  d      f}|7| j                  t
        j                  k(  r|t
        j                  k(  sJ d       | j                  ||| j                  n|      j                  ||      }|S )	Nr   zoutput size mismatchr   r   r&  z*we can only handle the squashed input case9out_dtype is only supported for i8i8->i32 linear operatorr   )
r   r   r   r   rU   rM   int8int32r   r   )	r  rn  ro  rp  rq  rr  output_sizestransposed_stridesr5  s	            r:   meta_sparse_structured_linearry    s    $L{{1~1-E/EE-;;q>UZZ^a////{{1~L u{{q N"NN UZZ]+{{ejj(Y%++-E 	
G	
E __&.ekkI   j12 
 Mr<   mat1	mat1_metamat2c                    t        | j                        dk(  sJ t        |j                        dk(  sJ t        |j                        dk(  sJ | j                  d      |j                  d      dz  k(  sJ | j                  d      |j                  d      g}|7|j                  t        j
                  k(  r|t        j                  k(  sJ d       |j                  |||j                  n|      }|S )Nr&  r   r   rt  r   r   r   r   rU   rM   ru  rv  r   )rz  r{  r|  rr  rw  r5  s         r:   meta_sparse_structured_mmr    s     tzz?ay1$$$tzz?a99Q<499Q<!++++IIaL$))A,/LzzUZZ'I,D 	
G	
D ^^%-djj9  F
 Mr<   r   )alphabetarr  c                |   t        | j                        dk(  sJ d       t        |j                        dk(  sJ t        |j                        dk(  sJ t        |j                        dk(  sJ | j                  d      |j                  d      k(  sJ d       |j                  d      |j                  d      dz  k(  sJ |j                  d      |j                  d      g}|7|j                  t        j
                  k(  r|t        j                  k(  sJ d       |j                  |||j                  n|      }|S )Nr   zEonly input broadcasted to columns of mat1 * mat2 product is supportedr&  r   rt  r   r~  )	r  rz  r{  r|  r  r  rr  rw  r5  s	            r:   meta_sparse_structured_addmmr    s/    u{{q  O  tzz?ay1$$$tzz?a::a=DIIaL( O( 99Q<499Q<!++++IIaL$))A,/LzzUZZ'I,D 	
G	
D ^^%-djj9  F
 Mr<   compressed_Adense_Br  transpose_resultalg_idsplit_ksplit_k_modec	                 L   |j                   t        j                  t        j                  t        j                  t        j
                  t        j                  hv sJ d       | j                   |j                   k(  sJ d       t        |j                        dk(  sJ d       | j                   t        j
                  t        j                  fv }	|	rdnd}
|	r|j                         rJ d       |j                  d      }|j                  d	      }| j                         d
z  |
|z  z  }|||j                  d      k(  sJ |I|	r@|t        j                  t        j                  t        j                  t        j                  hv sJ d       |r||fn||f}|j                  ||      S )Nz;_cslt_sparse_mm only supports fp16, bf16, int8, and fp8e4m3zinputs must have the same dtyper&  z'_cslt_sparse_mm only supports 2d inputs
   	   z.dense input must be transposed for 8bit dtypesr   r      z\out_dtype is not supported for {compressed_A.dtype} x {dense_B.dtype} -> {out_dtype} matmul!r   )rU   rM   float32float16bfloat16ru  float8_e4m3fnr   r   r   r   r   rv  r   )r  r  rp  r  rr  r  r  r  r  is_8bit_input_typecompression_factorkr>  moutput_shapes                  r:   meta__cslt_sparse_mmr    s    ==

  E EE  .Q0QQ.w}}"M$MM"%++

E<O<O/PP1q((* 	
<	
* 	QAQA					"(:Q(>?ADIIaL   !iMMNNKK	4
 '
 	
 k	
 
 .Aq6Aq6L\;;r<   )include_selfr   sourcer   r  c                L    t        j                  | t         j                        S r   rM   r   r   r   r}   r   r  r   r  s         r:   meta_index_reducer  L  s     D0G0GHHr<   c                    | S r7   r@   r  s         r:   meta_index_reduce_r  Y  s	     Kr<   c                     t        | j                               }| j                         dkD  r|j                         ||<   | j	                  |      S Nr   )r   r   r}   r   r   )r   r}   r   result_sizes       r:   meta_index_selectr  g  s@     tyy{#KxxzA~ ;;=C>>+&&r<   )lengthsr   offsetsaxisunsafeinitialdatar  r  r  r  c                     |t        d       fd}| ||j                        S |+|j                  d d |j                  d   dz
  fz   }	 ||	      S t        d      )Nz?segment_reduce(): indices based reduction is not supported yet.c                     t        j                  | j                  dz   d  z   j                  dt         j                        S )Nr   rw   rU   rz   r   )rM   r   r   rU   r   )lengths_shaper  r  s    r:   segment_reduce_lengths_tensorz:meta_segment_reduce.<locals>.segment_reduce_lengths_tensor  s>    {{DJJtaxz22**11	
 	
r<   r   r   z<segment_reduce(): Either lengths or offsets must be defined.)NotImplementedErrorr   r[  )
r  r   r  r   r  r  r  r  r  r  s
   `    `    r:   meta_segment_reducer  p  s|     !M
 	

 ,W]];; cr*gmmB.?!.C-EE,];;
U
VVr<   c                 $    | j                  d      S Nr@   r   r   s    r:   meta_maxr         >>"r<   c                     t        j                  | j                  |f      }t        | ||      }| j	                  |      | j	                  |t
        j                        fS Nr   rE   reduction_dimsr   _compute_reduction_shaper   rM   r   r   r}   keepdimr  s       r:   meta_max_dimr    R    


tzzC6
2C+D#w?L|$|5::6 r<   c                 $    | j                  d      S r  r  r   s    r:   meta_minr    r  r<   c                     t        j                  | j                  |f      }t        | ||      }| j	                  |      | j	                  |t
        j                        fS r  r  r  s       r:   meta_min_dimr    r  r<   c                     | j                         rt        | j                        }nt        | t        j
                        \  }}t        j                  | |      S NrD   r   )r!  r   rU   r   r   INT_TO_FLOATrM   r   )r   rI   rH   s      r:   
meta_angler    sI    /

;, ? L L
< D55r<   c                     t        j                  || j                         | j                         |j	                  t        j
                  |             S r7   )rM   _resize_output_r   rz   copy_angle)r   r  s     r:   meta_angle_outr    s6    	#tyy{DKK899U[[&''r<   c                      y r7   r@   )vals    r:   assert_asyncr        
r<   c                      y r7   r@   )r  
assert_msgs     r:   assert_async_metar    r  r<   c                      y r7   r@   )r   s    r:   
print_metar    r  r<   rU   ry   rz   r{   r   c                 0    t        j                  dd      S )Nr   rw   rz   rC  r  s        r:   make_dep_tokenr    s     ;;q((r<   c                 h    ddl m} t        | t        t        f      rt        d       || ||       y )Nr   )constrain_range'Constraining SymFloat or Symbool is nyir/  r   )r   r  rg   r   r   r   )r   r/  r   r  s       r:   sym_constrain_ranger    s/     F$7+,BCCDcs+r<   c                 6    t         j                  | ||       |S Nr  )r/   r  r   r/  r   	dep_tokens       r:   functional_sym_constrain_ranger    s    Ts4r<   c                 (   ddl m} ||t        j                  |        y t	        | t
        t        f      rt        d      t        |       t        u r5|t        j                  | |k\         |t        j                  | |k         y  || ||       y )Nr   )_constrain_range_for_sizer  r  )r   r  rM   _check_is_sizerg   r   r   r   rq   r  r^   )r   r/  r   r  s       r:   sym_constrain_range_for_sizer    s     P
{s{T"$7+,BCCDzS?LL%?LL%d5r<   c                 6    t         j                  | ||       |S r  )r/   r  r  s       r:   'functional_sym_constrain_range_for_sizer    s    %%d%=r<   c                     |S r7   r@   )r  r  r  s      r:   functional_assert_async_metar    s    r<   f_namec                     | j                         dk\  s
J | d       | j                  d      | j                  d      k(  s.J | d| j                  d       d| j                  d       d       y )Nr&  z3: The input tensor must have at least 2 dimensions.r   z5: A must be batches of square matrices, but they are  by 	 matrices)r}   r   )r   r  s     r:   r   r     s}    88:? (EF? 99R=DIIbM) (G		RTVZ[_[d[deg[hZiirs)r<   Anamec                     t        j                   j                  j                  k(   fd       t        j                   j                  j                  k(   fd       t        j                  j	                  d      j	                  d      k(  fd       t        j                  j	                  d       j	                  d      k(   fd       y )Nc                  >    dj                    d j                    dS )Nz:Expected b and A to be on the same device, but found b on z
 and A on 	 instead.r  r  r   s   r:   r\   z(linearSolveCheckInputs.<locals>.<lambda>   s     H{{m:ahhZy:r<   c                  >    dj                    d j                    dS )Nz=Expected b and A to have the same dtype, but found b of type z and A of type r  r   r  s   r:   r\   z(linearSolveCheckInputs.<locals>.<lambda>(  s     Kzzl/!'')=r<   r   r  c                  R    d j                  d       d j                  d       dS )Nz3A must be batches of square matrices, but they are r  r  r   r  r   r  s   r:   r\   z(linearSolveCheckInputs.<locals>.<lambda>0  s+    FF2J<tAFF2J<yBr<   c                      d d j                  d       d j                  d       dj                  d       dj                  d       
S )NzIncompatible matrix sizes for z: each A matrix is r   r  z but each b matrix is r  r   )r  r  r   s   r:   r\   z(linearSolveCheckInputs.<locals>.<lambda>8  sM    ,TF 3D$TYYr]O4		"Hr<   )rM   r^   rz   rU   r   )r   r  r  s   ```r:   linearSolveCheckInputsr    s    	LLqxx	
 
LL

agg	
 
LL	r
affRj 	
 
LL	r
diim#	
r<   tallow_low_precision_dtypesc                 J   | j                   t        j                  | j                         xs | j	                         fd       |sYt        j                  t        j
                  t        j                  t        j                  t        j                  fv fd       y y )Nc                       d  S )Nz<: Expected a floating point or complex tensor as input. Got r@   rU   r  s   r:   r\   z(checkFloatingOrComplex.<locals>.<lambda>I  s    6(VW\V]^r<   c                       d  S )Nz*: Low precision dtypes not supported. Got r@   r   s   r:   r\   z(checkFloatingOrComplex.<locals>.<lambda>N  s    vhHPr<   )	rU   rM   r^   r-  r!  rQ   rS   rP   rR   )r  r  r  rU   s    ` @r:   r   r   A  sn    
 GGE	LL	/^ &ekk5<<u}}MMP	
 &r<   arg_namec                 ^    t        j                  | j                         dk\  fd       y )Nr&  c                       d  dS )Nz: The input tensor z! must have at least 2 dimensions.r@   )r  r  s   r:   r\   zcheckIsMatrix.<locals>.<lambda>V  s    6(-hZ7XYr<   )rM   r^   r}   )r  r  r  s    ``r:   checkIsMatrixr  S  s    	LL	1Yr<   Br  c                      t                t               t        j                  r# j	                  d      j	                  d      k(  n" j	                  d      j	                  d      k(   fd       y )Nr  r   c                       drdnd d j                  d       d j                  d       dj                  d       dj                  d       d	S )
Nz2: Incompatible shapes of A and B for the equation zAX = BzXA = Bz (r  rJ   r   r   rp   r   )r  r  r  r  s   r:   r\   z#checkInputsSolver.<locals>.<lambda>_  sV    hHxX.AaffRj\qvvbzl!AFF2J<qJr<   )r   r  rM   r^   r   )r  r  r  r  s   ````r:   checkInputsSolverr	  Z  sY    a !V	LL$(r
affRj affRjAFF2J.F	
r<   resultfn_nameresult_namec                 r     t        j                  j                  j                  k(   fd       y )Nc            	      L      d d dj                    dj                    	S )Nz: Expected z5 and input tensors to be on the same device, but got z on z and input on r  )r  r  r
  r  s   r:   r\   z!checkSameDevice.<locals>.<lambda>o  s0    i{;-/dm4nU\\NLr<   )rM   r^   rz   )r  r
  r  r  s   ````r:   checkSameDevicer  g  s&     
LL%	
r<   UPLOc                       j                         }t        j                  t               dk(  xr |dk(  xs |dk(   fd       y )Nr   ULc                      d  S )Nz1Expected UPLO argument to be 'L' or 'U', but got r@   )r  s   r:   r\   zcheckUplo.<locals>.<lambda>z  s    CD6Jr<   )upperrM   r^   r   )r  UPLO_uppercases   ` r:   	checkUplor  v  s<    ZZ\N	LLD	QKNc1J^s5JJr<   eigenvalueseigenvectorsr  	compute_vc                 T   t        | d       t        |       t        | j                        }|r/| j	                  |      }|j                  |t        |d             n| j	                  dg      }|j                          | j	                  |t        | j                              }||fS )Nzlinalg.eighF	row_majorr   r   )
r   r  r   r   r   r  r    poprW   rU   )r  r  r  r   vecsvalss         r:   meta__linalg_eighr!  ~  s     a'dOME{{5! ;EU ST{{A3	IIK;;uOAGG$<;=D:r<   c                     t        | d       t        j                  | j                        r| j                  nt        j                  | j                        }| j                  | j                  d d |      S )Nzlinalg.eigvalsr   r   r   rE   r   rU   r   r   r   )r  complex_dtypes     r:   meta__linalg_eigvalsr%    sc     e-. !!%++. 	..u{{; 
 ??5;;s+=?AAr<   c                 0   t        | d       t        j                  | j                        r| j                  nt        j                  | j                        }| j                  | j                  d d |      }| j                  | j                  |      }||fS )Nz
linalg.eigr   r   r#  )r  r$  r   vectorss       r:   meta_linalg_eigr(    s     e\* !!%++. 	..u{{; 
 __U[["-]_CFooekko?G7?r<   r_  c                 v    | j                   j                  t        j                        j	                  dd      S )Nr   r  r   )mTr"  rM   r   	transpose)r_  s    r:   cloneBatchedColumnMajorr,    s*    66<<e&=&=<>HHRPPr<   r  c                     t        |       S r7   )r,  )r   r  r  s      r:   _cholesky_solve_helperr.    s     #4((r<   c                      t        j                   j                  dk\   fd       t        j                  j                  dk\  fd       t         d      \  }}t	        |||      S )Nr&  c                  $    d j                    dS )Nz-b should have at least 2 dimensions, but has  dimensions insteadr   r   s   r:   r\   z cholesky_solve.<locals>.<lambda>  s    ?		{J]^r<   c                  $    d j                    dS )Nz-u should have at least 2 dimensions, but has r1  r2  r  s   r:   r\   z cholesky_solve.<locals>.<lambda>  s    ?xGZ[r<   cholesky_solve)rM   r^   r   !_linalg_broadcast_batch_dims_namer.  )r   r  r  self_broadcastedA_broadcasteds   ``   r:   r4  r4    sh     
LL		Q^ 
LL	![ 'Ha!'#m ""2M5IIr<   c                     | j                         dk(  r%t        j                  | t        j                        S t	        | d       t        |       S )Nr   r   cholesky)r   rM   r   legacy_contiguous_formatr   r,  r   r  s     r:   r9  r9    s@     zz|qE4R4RSSdJ'"4((r<   c                 0    t        | d       t        |       S )Ncholesky_inverse)r   r,  r;  s     r:   r=  r=    s     d./"4((r<   check_errorsc                    t        | d       t        | d       | j                  }t        |      }t	        |d      }| j                  |      }|j                  ||       | j                  |d|dz
   t        j                        }||fS )Nzlinalg.choleskyFr   r&  r   )	r   r   r   r   r    r   r  rM   rv  )r  r  r>  A_shaper   	L_stridesr  infoss           r:   linalg_cholesky_exrC    s    a*+1/0ggGw<D ,GU;I	GAMM'9% KKD1H-U[[KAEe8Or<   tauc                 @    t        j                   j                  dk\  d        t        j                   j                  d       j                  d      k\  d        t        j                   j                  d      j                  d      k\  d        t        j                   j                  j                  z
  dk(   fd        j                  dkD  r: j                  d d }j                  d d t        j                  |k(  fd	       t        j                  j
                   j
                  k(   fd
       t        d d       t        j                   j                  t         j                  d       j
                   j                        S )Nr&  c                       y)NzHtorch.linalg.householder_product: input must have at least 2 dimensions.r@   r@   r<   r:   r\   z,linalg_householder_product.<locals>.<lambda>      Zr<   r  r   c                       y)Nzbtorch.linalg.householder_product: input.shape[-2] must be greater than or equal to input.shape[-1]r@   r@   r<   r:   r\   z,linalg_householder_product.<locals>.<lambda>  s    tr<   c                       y)Nz`torch.linalg.householder_product: input.shape[-1] must be greater than or equal to tau.shape[-1]r@   r@   r<   r:   r\   z,linalg_householder_product.<locals>.<lambda>  s    rr<   r   c                  <    dj                    d j                    S )Nzptorch.linalg.householder_product: Expected tau to have one dimension less than input, but got tau.ndim equal to  and input.ndim is equal to r2  r  rD  s   r:   r\   z,linalg_householder_product.<locals>.<lambda>  "    )),
2Nuzzl\r<   c                      d  S )Nzltorch.linalg.householder_product: Expected batch dimensions of tau to be equal to input.shape[:-2], but got r@   actual_batch_tau_shapes   r:   r\   z,linalg_householder_product.<locals>.<lambda>      66L5MOr<   c                  <    dj                    d j                    S )Nz,torch.linalg.householder_product: tau dtype z does not match input dtype r   rL  s   r:   r\   z,linalg_householder_product.<locals>.<lambda>  s    :399+*5;;-9r<   z torch.linalg.householder_productrD  Fr  r   r   rU   rz   )
rM   r^   r   r   r   rU   r  empty_stridedr    rz   )r  rD  expected_batch_tau_shaperP  s   `` @r:   linalg_householder_productrV    sK   
 
LL

aZ 
LL

2%**R.(t 
LL

2#((2,&r
 
LL

SXX"	
 zzA~#(;;s#3 !$3B"&>>	
 
LL		U[[ 	
 6UEJ[[*5;;%Hkk||	 r<   c                 2   t        | d       t        | dd       | j                  | j                        }|j	                  | j                  t        | j                  d             | j                  | j                  d d t        j                        }||fS )Nzlinalg.inv_exF)r  r  r  r   r   r   r   r   r  r    rM   rv  )r  r>  r  rB  s       r:   linalg_inv_ex_metarY    sq    a)1o%P	AGGAMM!''6qww%PQKKEKKK8Ee8Or<   LDpivotsinfo)	hermitianr>  r]  c                   t        | d       t        | d       t        j                  | j                  t        | j                  d      | j                  | j                        }| j                  | j                  d d t        j                        }| j                  | j                  d d t        j                        }|||fS )Nztorch.linalg.ldl_factor_exFr  rS  r   r   r  )
r   r   rM   rT  r   r    rU   rz   r   r  )r   r]  r>  rZ  r[  r\  s         r:   linalg_ldl_factor_ex_metar_  +  s     d894!=>			ZZ*4::Gjj{{	
B ^^DJJsO599^=F>>$**Sb/>;Dvtr<   )r]  c                d    t         d       t         d       t         d       t        j                  j
                  dk\  fd        j                  d d }t        j                  |j                  k(  fd       t        j                  t        j                  j                        fd       t        j                   j                  j                  k(   fd       t               \  }}t        j                  |t        |d	      j                  j                  
      S )Nztorch.linalg.ldl_solver&  c                  $    d j                    dS )NzMtorch.linalg.ldl_solve: Expected B to have at least 2 dimensions, but it has r1  r2  )r  s   r:   r\   z'linalg_ldl_solve_meta.<locals>.<lambda>N      &&!46r<   r   c                  $    d j                    dS )Nzjtorch.linalg.ldl_solve: Expected LD.shape[:-1] and pivots.shape to be the same, but got pivots with shape  insteadr   r[  s   r:   r\   z'linalg_ldl_solve_meta.<locals>.<lambda>V      ))/h@r<   c                  "    d j                    S )Nz<torch.linalg.ldl_solve: Expected pivots to be integers. Got r   re  s   r:   r\   z'linalg_ldl_solve_meta.<locals>.<lambda>]  s    Nv||n]r<   c                  <    dj                    d j                    S )Nz!torch.linalg.ldl_solve: LD dtype z does not match b dtype r   )r  rZ  s   r:   r\   z'linalg_ldl_solve_meta.<locals>.<lambda>a  s     3BHH:=UVWV]V]U^_r<   Fr  rS  )r   r   r  rM   r^   r   r   rE   is_integer_dtyperU   _linalg_broadcast_batch_dimsrT  r    rz   )rZ  r[  r  r]  expected_pivots_shapeB_broadcast_sizerH   s   ```    r:   linalg_ldl_solve_metarm  @  s     b232781b":;	LL	!	
 HHSbM	LL-	
 
LLv||,] 
LL
AGG_ 7q"=a*+;uMggxx	 r<   Pr  )pivotro  c                h    t        j                   j                  dk\   fd       t         j                        }|d   }|d   }t        ||      }||d<   |r j                  |      }n j                  dg      }||d<    j                  |      }||d<   ||d<    j                  |      }|||fS )Nr&  c                  $    d j                    dS )Nz@linalg.lu: Expected tensor with 2 or more dimensions. Got size: rd  r   r  s   r:   r\   z linalg_lu_meta.<locals>.<lambda>q  s    RSTSZSZR[[cdr<   r  r   r   )rM   r^   r   r   r   r/  r   )	r  ro  sizesr  r>  r  rn  r  r  s	   `        r:   linalg_lu_metars  l  s     
LL	!d
 MEb	Ab	AAq	AE"IKKKKE"I	EAE"IE"I	EAa7Nr<   LU)ro  r>  c                    t        j                   j                  dk\   fd       t         j                        }|d   }|d   }t        j
                  |t        |d       j                   j                        }|j                          t        ||      |d<    j                  |t         j                        }|j                           j                  |t         j                        }|||fS )	Nr&  c                  $    d j                    dS )NzFtorch.lu_factor: Expected tensor with 2 or more dimensions. Got size: rd  r   r  s   r:   r\   z*linalg_lu_factor_ex_meta.<locals>.<lambda>  s    XYZY`Y`Xaaijr<   r  r   Fr  rS  r   )rM   r^   r   r   r   rT  r    rU   rz   r  r/  r   r  )	r  ro  r>  rr  r  r>  rt  r[  r\  s	   `        r:   linalg_lu_factor_ex_metarw    s     
LL	!j
 MEb	Ab	A			*5EBggxx	
B 
IIKAq	E"I[[eii[0F 
IIK;;uEII;.Dvtr<   )r  adjointrx  c                    t         d       t        j                   j                  j                  k(   fd       t        j                  j                  t        j                  k(  d        t         d       t         |d       t        j                   j                  d      j                  d      k(  d        t        j                   j                  d d j                  k(  fd       t               \  }}t        j                  |t        ||       j                  j                  	      }|j                         d
k7  r"|s |j                         r|j                         }|S )Nztorch.linalg.lu_solvec                  >    dj                    d j                    dS )NzPlinalg.lu_solve: Expected LU and B to have the same dtype, but found LU of type  and B of type rd  r   )r  rt  s   r:   r\   z&linalg_lu_solve_meta.<locals>.<lambda>  s#    $$&HH:_QWWIXOr<   c                       y)NzElinalg.lu_solve: pivots should be a Tensor of scalar type torch.int32r@   r@   r<   r:   r\   z&linalg_lu_solve_meta.<locals>.<lambda>  s    Wr<   zlinalg.lu_solver   c                       y)NzYlinalg.lu_solve: Number of pivots per batch should be same as the dimension of the matrixr@   r@   r<   r:   r\   z&linalg_lu_solve_meta.<locals>.<lambda>  s    kr<   c                  $    d j                    dS )Nzclinalg.lu_solve: Expected LU.shape[:-1] and pivots.shape to be the same, but got pivots with shape rd  r   re  s   r:   r\   z&linalg_lu_solve_meta.<locals>.<lambda>  rf  r<   r  rS  r   )r   rM   r^   rU   r  r   r	  r   r   rj  rT  r    rz   r   r!  conj)rt  r[  r  r  rx  rl  rH   r
  s   ```     r:   linalg_lu_solve_metar    s.    267	LL
AGG	
 
LL		!W b12b!T#45	LL
v{{2&k 
LL
"%	
 7q"=a  *+;4xPggxx	F ||~4[[]FMr<   unpack_dataunpack_pivotsc                     t        j                   j                  dk\   fd       |r2t        j                  |j                  t         j                  k(  d        t         j                        }|d   }|d   }t        ||      }||d<   |r j                  |      }n j                  dg      }|r2||d<    j                  |      }	||d<   ||d<    j                  |      }
n$ j                  dg      }	 j                  dg      }
||	|
fS )Nr&  c                  $    d j                    dS )NzFtorch.lu_unpack: Expected tensor with 2 or more dimensions. Got size: rd  r   )rt  s   r:   r\   z lu_unpack_meta.<locals>.<lambda>  s    XY[YaYaXbbjkr<   c                       y)Nztorch.lu_unpack: LU_pivots is expected to be a contiguous tensor of torch.int32 dtype.
Note: this function is intended to be used with the output produced by torch.linalg.lu_factorr@   r@   r<   r:   r\   z lu_unpack_meta.<locals>.<lambda>  s    pr<   r  r   r   )	rM   r^   r   rU   rv  r   r   r/  r   )rt  r[  r  r  rr  r  r>  r  rn  r  r  s   `          r:   lu_unpack_metar    s     
LL
1k LLEKK'	
 NEb	Ab	AAq	AE"ILLLL!b	LLb	b	LLLL!LL!a7Nr<   modec                       dk(  rd}d}||fS  dk(  rd}d}||fS  dk(  rd}d}||fS t        j                  d fd       fS )NreducedTcompleteFrc                      d  dS )Nzqr received unrecognized mode 'z=' but expected one of 'reduced' (default), 'r', or 'complete'r@   )r  s   r:   r\   z _parse_qr_mode.<locals>.<lambda>  s    1$ 8N Or<   rM   r^   )r  	compute_qr  s   `  r:   _parse_qr_moder    s    y	 g 
		 g 
	 g 		
 gr<   QRc                    t        | d       t        | d       t        |      \  }}| j                  d   }| j                  d   }t	        ||      }|rMt        | j                        }|r|n||d<   | j                  |      }|j                  |t        |d             n| j                  dg      }t        | j                        }	|s|s|n||	d<   | j                  |	      }
|
j                  |	t        |	d             ||
fS )Nz	linalg.qrr  r   Fr  r   )	r  r   r  r   r/  r   r   r  r    )r  r  r  reduced_moder  r>  r  Q_shaper  R_shaper  s              r:   linalg_qr_metar  $  s     ![!1k*,T2I|	A	AAq	Aqww-'aQKK 	g:7eTUKK 177mG#9!!GBK	GAMM'6w%PQa4Kr<   sign	logabsdetc                    t        | d       t        | dd       | j                  }| j                  |d d       }| j                  |d d t	        | j
                              }t        j                  |t        |d      | j
                  | j                        }| j                  |d d t        j                        }||||fS )Nzlinalg.slogdetFr  r   rS  r   )r   r   r   r   rW   rU   rM   rT  r    rz   rv  )r  r   r  r  rt  r[  s         r:   _linalg_slogdetr  @  s     a)*1.6GGE;;uSbz"DE#2Joagg.FGI			*5%8ggxx	
B [[s5;;[7FB&&r<   full_matrices
compute_uvdriverc                 b   t        | d       t        | d       t        | j                  d d       }| j                  d   }| j                  d   }t	        ||      }|r|||r|n|gz   }| j                  |      }	|	j                  |t        |d             ||r|n||gz   }
| j                  |
      }t        |       dk(  }|j                  |
t        |
|             n$| j                  dg      }	| j                  dg      }| j                  ||gz   t        | j                              }|	||fS )	Nz
linalg.svdr  r   Fr  r*  r   r   )r  r   r   r   r/  r   r  r    r.  rW   rU   )r  r  r  r  r	  r  r>  r  U_shaper  V_shapeVis_cudaSs                 r:   _linalg_svd_metar  T  s#    !\"1l+aggcrl#J	A	AAq	A11==KK 	g:7eTU]1==KK 
 a.F*	g:7gVW KKKK 	
J!$OAGG,DEAa7Nr<   arg1arg2c                    | j                   d d }|j                   d d }t        ||      }t        |      }|| j                  d      | j                  d      gz  }t        |      }||j                  d      |j                  d      gz  }||fS )Nr  r   )r   r)   r   r   )r  r  arg1_batch_sizesarg2_batch_sizesexpand_batch_portionarg1_expand_sizearg2_expand_sizes          r:   rj  rj  z  s    
 zz#2zz#2,-=?OP012		"66012		"66---r<   c                     |rt        | ||       t        | |      \  }}|| j                  k(  r| n| j                  |      }||j                  k(  r|n|j                  |      }||fS r7   )r  rj  r   expand)r  r  r  r  r  arg1_broadcastedarg2_broadcasteds          r:   r5  r5    sv     tT40)EdD)Q&& !DJJ.DKK@P4Q  !DJJ.DKK@P4Q  ---r<   r   c                     | j                   d d }|j                  dk(  xs- | j                  dz
  |j                  k(  xr |j                   |k(  }|S )Nr   r   )r   r   )r  r   expected_batched_rhs_shapevector_cases       r:   linalg_solve_is_vector_rhsr    sS    !&Sb!1**/ 

Q%**$R8R)R  r<   )r  r>  r
  rt  r[  r\  c                    t         d       t        j                   j                  j                  k(   fd       t	               }|rj                  d      n}	t         |	|d       t        |	       \  }
}t        j                  |xs | d        |r|
d d n|
}t        j                  |t        ||       j                  j                        } j                  }t        j                  |t        |d       j                   j                        } j                  |d d t        j                        } j                  |d d t        j                        }||||f}||||f}t        d	 |D              rbt        ||      D ]S  \  }}t!        ||j                         |j#                  |j                  |j%                                t'        ||d
       U |S )Nzlinalg.solvec                  >    d j                    dj                    dS )NzKlinalg.solve: Expected A and B to have the same dtype, but found A of type r{  rd  r   )r  r  s   r:   r\   z"_linalg_solve_ex.<locals>.<lambda>  s     Ywwiqwwix9r<   r   c                       y)Nzlinalg.solve: Vector broadcasting of the left hand side is not supported for left=False. In this case linalg.solve is equivalent to B / A.squeeze(-1)r@   r@   r<   r:   r\   z"_linalg_solve_ex.<locals>.<lambda>  s    Kr<   rS  Fr   r  c              3   $   K   | ]  }|d u 
 y wr7   r@   ri   rJ   s     r:   rk   z#_linalg_solve_ex.<locals>.<genexpr>  s     
&#Q1D=#s   )	copy_fromcopy_toexact_dtype)r   rM   r^   rU   r  r   r	  rj  rT  r    rz   r   r   rv  allzipr%   r  r   r'   )r  r  r  r>  r
  rt  r[  r\  r  B_B_broad_shaperH   result_shaperesult_r   LU_pivots_info_r  resr  os   ``                    r:   _linalg_solve_exr    s    1n-	LL	177	
 -Q2K'RQBaT>23B:M1	LLK	
 *5="%-L!!*<TBggxx	G GGE


*5%8ggxx	C kk%*EKKk8GKKcr
%++K6E2vt
$CC%
(C

&#
&&SMDAqa)MM!''188:.QuE " Jr<   )r  unitriangularr  r  r  c                   || j                  dg      }t        |t              sJ t        | ||d       t	        || d       \  }}|j                  dd      j                         xr |j                         }|rt        ||j                        }|S t        ||j                        r=|j                  |j                  dd      j                         |j                  dd       |S )Nr   zlinalg.solve_triangularr  r   )r   rg   r#   r	  r5  r+  r   is_conjr%   r   r&   r  
transpose_)	r  r  r  r  r  r  r  A_avoid_copy_As	            r:   linalg_solve_triangular_metar    s     {kk1#c:&&&aD";<.q!T:FB<<B'557HBJJLLRXX. J  RXX.KKR,223NN2r"Jr<   XM)r  r+  c                     t        j                   j                  dk\   fd       t        j                  j                  dk\  fd       t         d       j                  t         j
                  k(  rt               \  }}t        j                  |t        |d       j                   j                        }t        j                  |t        |d      j                  j                        }||fS j                  t         j                  k(  sj                  t         j                  k(  r+t        j                         } j                  dg      }||fS t        j                  dd	        fS )
Nr&  c                  $    d j                    dS )NzMtorch.triangular_solve: Expected b to have at least 2 dimensions, but it has r1  r2  r   s   r:   r\   z'triangular_solve_meta.<locals>.<lambda>  s    ))$79r<   c                  $    d j                    dS )NzMtorch.triangular_solve: Expected A to have at least 2 dimensions, but it has r1  r2  r  s   r:   r\   z'triangular_solve_meta.<locals>.<lambda>  rb  r<   triangular_solveFr  rS  r   c                       y)Nz+triangular_solve: Got an unexpected layout.r@   r@   r<   r:   r\   z'triangular_solve_meta.<locals>.<lambda>(  s    $Qr<   )rM   r^   r   r  ry   stridedrj  rT  r    rU   rz   
sparse_csr
sparse_bsrr   r   )	r   r  r  r+  r  self_broadcast_sizeA_broadcast_sizesolutioncloned_coefficients	   ``       r:   triangular_solve_metar    sL    
LL		Q	
 
LL	!	
 4$67xx5== 0LTST0U--&&$./BeT**;;	
 #00!./?5Q''88	
 ''' 
U%%	%U5E5E)E##D)!^^QC0 ''' 	UQR'''r<   c                 l   t        | d       t        | d       | j                  | j                  d d       }| j                  | j                        }|j	                  | j                  t        | j                  d             | j                  | j                  d d t        j                        }|||fS )Nz
linalg.detr  Fr  r   r   rX  )r  detrt  r[  s       r:   _linalg_det_metar  -  s    a&1l+
++aggcrl
#C	
QWW	BNN17775QR[["U[[[9FF?r<   c                 0    t        j                   j                  dk\  d        t        j                  j                  dk\  d        |rdndt        j                  j                     j                  d   k\  fd       t        j                  j                      j                  d   k(  fd       t        j                  j                  d    j                  d   k  d        t        j                   j                  j                  z
  d	k(   fd
       t        j                   j                  j                  k(   fd        j                  dkD  re j                  d d }j                  d d t        j                  |k(  fd       j                  d d t        j                  |k(  fd       t        j                  j                   j                  k(   fd       t        j                  j                   j                  k(   fd       t        d d       t        d d       t        j                  j                  t        j                  d      j                  j                        S )Nr&  c                       y)Nz3torch.ormqr: input must have at least 2 dimensions.r@   r@   r<   r:   r\   zormqr.<locals>.<lambda>E      !Vr<   c                       y)Nz3torch.ormqr: other must have at least 2 dimensions.r@   r@   r<   r:   r\   zormqr.<locals>.<lambda>H  r  r<   r  r   c                      d  dS )Ntorch.ormqr: other.shape[z0] must be greater than or equal to tau.shape[-1]r@   left_size_conditions   r:   r\   zormqr.<locals>.<lambda>N  s    +,?+@@pqr<   c                      d  dS )Nr  z"] must be equal to input.shape[-2]r@   r  s   r:   r\   zormqr.<locals>.<lambda>R  s    +,?+@@bcr<   c                       y)NzHtorch.ormqr: tau.shape[-1] must be less than or equal to input.shape[-1]r@   r@   r<   r:   r\   zormqr.<locals>.<lambda>W  rG  r<   r   c                  <    dj                    d j                    S )Nz[torch.ormqr: Expected tau to have one dimension less than input, but got tau.ndim equal to rK  r2  rL  s   r:   r\   zormqr.<locals>.<lambda>\  rM  r<   c                  <    dj                    d j                    S )Nzhtorch.ormqr: Expected other to have the same number of dimensions as input, but got other.ndim equal to rK  r2  r  r   s   r:   r\   zormqr.<locals>.<lambda>c  s&    ++0::,6RSXS]S]R^`r<   c                      d  S )NzWtorch.ormqr: Expected batch dimensions of tau to be equal to input.shape[:-2], but got r@   rO  s   r:   r\   zormqr.<locals>.<lambda>n  rQ  r<   c                      d  S )NzYtorch.ormqr: Expected batch dimensions of other to be equal to input.shape[:-2], but got r@   )actual_batch_other_shapes   r:   r\   zormqr.<locals>.<lambda>w  s    66N5OQr<   c                  <    d j                    dj                    S )NzPtorch.ormqr: Expected input and tau to have the same dtype, but input has dtype z and tau has dtype r   rL  s   r:   r\   zormqr.<locals>.<lambda>  s"    ##(;;-/B399+Or<   c                  <    d j                    dj                    S )NzRtorch.ormqr: Expected input and other to have the same dtype, but input has dtype z and other has dtype r   r  s   r:   r\   zormqr.<locals>.<lambda>  s"    ##(;;-/DU[[MSr<   ztorch.ormqrrD  r   Fr  rS  )	rM   r^   r   r   rU   r  rT  r    rz   )	r  rD  r   r  r+  expected_batch_shaper  rP  r  s	   ```   @@@r:   ormqrr  ;  s    
LL

aV 
LL

aV !%""	LL'(CIIbM9q 
LL'(EKKO;c
 
LL		"R(Z
 
LL

SXX"	
 
LL

ejj 	
 zzA~${{3B/!$3B"&::	
 $);;s#3 $(<<	
 
LL		U[[ 	
 
LLu{{"	
 M3u5M5%9[[*5;;%Hkk||	 r<   c                t    t        j                  t              dz  k(  fd        j                  }|dz   k(  }|}| }|r*t	        d|      D ]  }|xr  j                  |      dk7  } n)t	        d|      D ]  }|xr  j                  |      dk7  } t        j                  |xs | fd       y )Nr&  c                  ,    dd z   dt               S )Nzpadding size is expected to be r&  z, but got: r   )r}   paddings   r:   r\   z,_padding_check_valid_input.<locals>.<lambda>  s    1!c'+c'l^Tr<   r   r   c                  :    d dz    d dz    dj                    S )N	Expected r   zD or r&  zcD (batch mode) tensor with possibly 0 batch size and other non-zero dimensions for input, but got: r   )r}   r  s   r:   r\   z,_padding_check_valid_input.<locals>.<lambda>  s-    aycAgY /AAFOr<   )rM   r^   r   r   r   r   )r  r  r}   	input_dimis_batch_modevalid_batch_modevalid_non_batch_moder  s   ```     r:   _padding_check_valid_inputr     s    	LLGCT
 

I#'*M$,,q)$A/FEJJqMQ4F % q)$A#7#NEJJqMQ<N  % 
LL00	
r<   c                   	
 d}dd} j                   dk(  r j                  d      }dz  |dz  }t         |d       |\  	
 j                  |      } j                        	z   
z   |r&t        j                  	k  xr 
k   	
fd       t        j                  dk\  fd        j                   dk(  r j                  |f      S  j                  ||f      S )Nr   r   r2   r   c                  4    d d d  dj                    S NzcArgument #4: Padding size should be less than the corresponding input dimension, but got: padding (ro   ) at dimension 
 of input r   dim_wr  pad_lpad_rs   r:   r\   z_pad1d_common.<locals>.<lambda>  -    %%*G2eWOE7*UZU`U`Tacr<   c                      d  d S )Nz
input (W: z%) is too small. Calculated output W: r@   )input_woutput_ws   r:   r\   z_pad1d_common.<locals>.<lambda>  s    *WI%J8*Ur<   r&  )r   r   r   rM   r^   r   )r  r  is_reflection	dim_planenbatchnplaner  r  r  r  r	  s   `     @@@@@r:   _pad1d_commonr    s    IEFzzQA
Q	ug15LE5ZZ	"FjjG&HGO/	
 
LLAU
 zzQ1229::r<   c                     t        | |d      S NTr  )r  r  r  s     r:   meta_reflection_pad1dr         t<<r<   c                      t        j                   j                  t         j                  k7   fd       t	         |d      S )Nc                  @    d j                   j                          dS )Nz)"replication_pad1d" not implemented for ''rU   __str__r  s   r:   r\   z(meta_replication_pad1d.<locals>.<lambda>      =ekk>Q>Q>S=TTUXr<   Fr  )rM   r^   rU   boolr  r  s   ` r:   meta_replication_pad1dr!    5     
LLuzz!X u==r<   c                    d|s#t        j                  t        |      dk(  d        j                  dk(  rdz  |\  j	                        }|z   z   |r&t        j                  |k  xr |k  fd       t        j                   j	                        k(   fd       j                  j                        S )Nr   r&  c                       y)Nz padding size is expected to be 2r@   r@   r<   r:   r\   z(_pad1d_backward_common.<locals>.<lambda>  s    0Rr<   r2   c                  4    d d d  dj                    S r  r   r  s   r:   r\   z(_pad1d_backward_common.<locals>.<lambda>  r
  r<   c                  2    d dj                          S Nz(grad_output width unexpected. Expected: , Got: r   r  grad_outputr  s   r:   r\   z(_pad1d_backward_common.<locals>.<lambda>       :8*GKL\L\]bLcKder<   rM   r^   r   r   r   r   r   )	r*  r  r  r  r  r  r  r  r	  s	   ``   @@@@r:   _pad1d_backward_commonr-    s    ES\Q&(RSzzQ
LE5jjG&HGO/	
 
LLK$$U++e
 ??5;;''r<   
grad_inputc                      t        | ||d      S r  r-  r*  r  r  s      r:   meta_reflection_pad1d_backwardr2  
  s     "+ugTRRr<   c                      t        | ||d      S )NFr  r0  r1  s      r:   meta_replication_pad1d_backwardr4    s     "+ugUSSr<   c                   	
 ddd}d}t         |d        j                  }|dk(  r  j                  d      }dz  dz  |dz  }|\   j                  |      } j                        	 j                        
	z   z   
z   z   |rLt        j                  
k  xr 
k   fd       t        j                  	k  xr 	k   fd       t        j                  dk\  xs dk\  	
fd        j                  d	k(  r j                  |f      S  j                  ||f      S )
Nr&  r   r   r      c                  4    d d d  dj                    S r  r   r  s   r:   r\   z_pad2d_common.<locals>.<lambda>0  r
  r<   c                  4    d d d  dj                    S NzcArgument #6: Padding size should be less than the corresponding input dimension, but got: padding (ro   r  r  r   dim_hr  pad_bpad_ts   r:   r\   z_pad2d_common.<locals>.<lambda>7  r
  r<   c                       d  d d d S )Nz
input (H:  W: z%) is too small. Calculated output H: r@   )input_hr  output_hr  s   r:   r\   z_pad2d_common.<locals>.<lambda>?  s%    	gY /$$,:T(=r<   r2   r   r   r   rM   r^   r   )r  r  r  
dim_slicesr  r   r  r;  r  r@  r  rA  r  r<  r  r	  r=  s   `      @@@@@@@@@@r:   _pad2d_commonrD    sU   EEJFug15::DqyA

a
!(E5%ZZ
#FjjGjjG&H&HGO/	
 	GO/	
 
LLA&Q	
 zzQ(;<<(CDDr<   c                     t        | |d      S r  )rD  r  s     r:   meta_reflection_pad2drF  K  r  r<   c                      t        j                   j                  t         j                  k7   fd       t	         |d      S )Nc                  @    d j                   j                          dS )Nz)"replication_pad2d" not implemented for 'r  r  r  s   r:   r\   z(meta_replication_pad2d.<locals>.<lambda>V  r  r<   Fr  )rM   r^   rU   r   rD  r  s   ` r:   meta_replication_pad2drI  Q  r"  r<   c                     ddd}|j                   }|j                         dk(  rdz  dz  |dz  }|\  }}}}|   }	|   }
|	|z   |z   |
|z   |z   t        j                   j	                        k(   fd       t        j                   j	                        k(   fd       |j                  |j                         S )Nr&  r   r   r6  c                  2    d dj                          S r'  r   r)  s   r:   r\   z%meta_pad2d_backward.<locals>.<lambda>x  r+  r<   c                  2    d dj                          S Nz)grad_output height unexpected. Expected: r(  r   r;  r*  rA  s   r:   r\   z%meta_pad2d_backward.<locals>.<lambda>|       ;H:W[M]M]^cMdLefr<   )r   r}   rM   r^   r   r   )r*  r   r  r  r[   r  r	  r=  r<  r@  r  r;  r  rA  r  s   `          @@@@r:   meta_pad2d_backwardrP  [  s     EEIJxxzQ

Q	!(E5%GG&H&H	LLK$$U++e 
LLK$$U++f >>$**%%r<   c          	      $   	
 d	ddd}t         |d        j                  dk(  }|r% j                  d      }	dz  	dz  dz  |dz  }|\   j                  |      } j                        
 j                         j                  	      
z   z   z   z   z   z   |rrt        j                  k  xr k  	 fd       t        j                  k  xr k   fd       t        j                  
k  xr 
k   fd	       t        j                  dk\  xs dk\  xs dk\  
fd
       |r j                  |f      S  j                  |f      S )Nr2   r&  r   r   r      c                  4    d d d  dj                    S r  r   r  s   r:   r\   z_pad3d_common.<locals>.<lambda>  r
  r<   c                  4    d d d  dj                    S r9  r   r:  s   r:   r\   z_pad3d_common.<locals>.<lambda>  r
  r<   c                  4    d d d  dj                    S )NzcArgument #8: Padding size should be less than the corresponding input dimension, but got: padding (ro   r  r  r   )dim_dr  pad_bkpad_fs   r:   r\   z_pad3d_common.<locals>.<lambda>  s-    %%*G2fX_UG:V[VaVaUbdr<   c                  ,    d  d d d d d S )Nz
input (D:  H: r?  z%) is too small. Calculated output D: r@   )input_dr@  r  output_drA  r  s   r:   r\   z_pad3d_common.<locals>.<lambda>  s2    	gYd7) <$$,:T(4zKr<   rB  )r  r  r  r  
batch_moder  r  rV  r;  r  r[  r@  r  r\  rA  r  r<  rW  rX  r  r	  r=  s   `      @@@@@@@@@@@@@@@r:   _pad3d_commonr^    s   EEEIug15qJA


Q	07-E5%vZZ	"FjjGjjGjjG'H&H&HGO/	
 	GO/	
 	GO0 0	
 
LLA7Q7(a-	
 	
 (HMNN(HEFFr<   c                     t        | |d      S r  )r^  r  s     r:   meta_reflection_pad3dr`    r  r<   c                      t        j                   j                  t         j                  k7   fd       t	         |d      S )Nc                  @    d j                   j                          dS )Nz)"replication_pad3d" not implemented for 'r  r  r  s   r:   r\   z(meta_replication_pad3d.<locals>.<lambda>  r  r<   Fr  )rM   r^   rU   r   r^  r  s   ` r:   meta_replication_pad3drc    r"  r<   c                     t        j                  t        |      dk(  d        |j                  dkD  sJ  j                  |j                  k(  sJ ddd|j                  dk(  rdz  dz  dz  |\  }}}}}}|j	                        }	|j	                        }
|j	                        }|	|z   |z   |
|z   |z   ||z   |z   t        j                   j	                        k(   fd       t        j                   j	                        k(   fd       t        j                   j	                        k(   fd	       |j                  |j                        S )
N   c                       y)Nz padding size is expected to be 6r@   r@   r<   r:   r\   z%meta_pad3d_backward.<locals>.<lambda>  s    ,Nr<   r2   r&  r   rR  c                  2    d dj                          S r'  r   r)  s   r:   r\   z%meta_pad3d_backward.<locals>.<lambda>  r+  r<   c                  2    d dj                          S rM  r   rN  s   r:   r\   z%meta_pad3d_backward.<locals>.<lambda>  rO  r<   c                  2    d dj                          S )Nz(grad_output depth unexpected. Expected: r(  r   )rV  r*  r\  s   r:   r\   z%meta_pad3d_backward.<locals>.<lambda>  r+  r<   r,  )r*  r  r  r  r	  r=  r<  rX  rW  r[  r@  r  rV  r;  r  r\  rA  r  s   `           @@@@@@r:   meta_pad3d_backwardrj    s_    
LLW"$NO::>>uzz)))EEEzzQ


07-E5%vjjGjjGjjG'H&H&H	LLK$$U++e 
LLK$$U++f 
LLK$$U++e
 ??5;;''r<   pc                 J   t        j                  | j                         d        | j                  d      }|dk  r0| j	                  dg      j                  t         j                        S | j	                  ||dz
  z  dz  f      j                  t         j                        S )Nc                       y)Nz(_pdist_forward requires contiguous inputr@   r@   r<   r:   r\   z%meta__pdist_forward.<locals>.<lambda>	  s    &Pr<   r   r   r   r&  )rM   r^   r   r   r   r\  r:  )r   rk  r>  s      r:   meta__pdist_forwardrn   	  s     
LLP 			!AAv~~qc"%%E4R4R%SS~~qAE{a/125588 6 
 	
r<   gradpdistc                     t        j                  |j                         d        t        j                  |j                         d        t        j                  |t         j                        S )Nc                       y)Nz._pdist_backward requires self to be contiguousr@   r@   r<   r:   r\   z&meta__pdist_backward.<locals>.<lambda>	  s    &Vr<   c                       y)Nz/_pdist_backward requires pdist to be contiguousr@   r@   r<   r:   r\   z&meta__pdist_backward.<locals>.<lambda>	  s    'Xr<   r   )rM   r^   r   r   r:  )ro  r   rk  rp  s       r:   meta__pdist_backwardrt  	  sW     
LLV 
LLX D0N0NOOr<   )r  r  c          
      0    ddl m}m} j                  d      }j                  d      }j                  d      }	 |t	        j
                   | j                  |||	f                  r j                  |||	f       t	        j                  j                         dk(  d        t	        j                  j                         dk(  d        t        j                  sGt	        j                   j                  j                  cxk(  xr j                  k(  nc  fd       j                  }
j                  |
d   |
d   t	        j                  d   k(  xr d   k(  fd	        j                   j                               S )
Nr   )r   r   r   r&  r2   c                       yNzbatch1 must be a 3D tensorr@   r@   r<   r:   r\   zmeta_baddbmm.<locals>.<lambda>%	      ,Hr<   c                       yNzbatch2 must be a 3D tensorr@   r@   r<   r:   r\   zmeta_baddbmm.<locals>.<lambda>&	  rx  r<   c                  V    dj                    d j                    dj                    S )Nz+Input dtypes must be the same, got: input: z
, batch1: z
, batch2: r   )batch1batch2r   s   r:   r\   zmeta_baddbmm.<locals>.<lambda>*	  s.    A$**ZX^XdXdWeeopvp|p|o}~r<   c            	      .    d d d d    d d    d	S Nz@Expected size for first two dimensions of batch2 tensor to be: [ro   z] but got: [r   r   ].r@   batch2_sizesbscontraction_sizes   r:   r\   zmeta_baddbmm.<locals>.<lambda>2	  s5    t2&'|LO3DB|TUFWWY[r<   )r   r   r   r   rM   sym_notr   r  r^   r}   
exp_config&skip_dtype_check_in_meta_registrationsrU   r   )r   r|  r}  r  r  r   r   dim1dim2dim3batch1_sizesr  r  r  s   ```        @@@r:   meta_baddbmmr  	  s7    L;;q>D;;q>D;;q>DU]]6$**tT46H#IJK{{D$-.	LL"$HI	LL"$HI<<JJ&,,6&,,6~	
 <<L<<L	aB#A	LLQ2E,q/5E"E	
 >>$))+&&r<   c                L    t        j                  | t         j                        S r   r  r   r;  s     r:   meta_bernoullir  :	  s     D0G0GHHr<   c                     | S r7   r@   r   rk  r;  s      r:   meta_bernoulli_r  A	      Kr<   c                 L    t        j                  | t         j                        S r   r  r  s      r:   meta_bernoulli_pr  F	  s     D0G0GHHr<   c                 ,    t        j                  |       S r7   rM   r   r  s     r:   meta_poissonr  L	       D!!r<   c                     t        j                  |
| j                         k  d        t        j                  | t         j                        }t        j                  |       |fS )Nc                       y)NzJError in fused_moving_avg_obs_fake_quant_cpu: ch_axis must be < self.dim()r@   r@   r<   r:   r\   z6meta__fused_moving_avg_obs_fq_helper.<locals>.<lambda>d	      \r<   r   )rM   r^   r}   r   r   )r   observer_onfake_quant_onrunning_minrunning_maxscale
zero_pointaveraging_const	quant_min	quant_maxch_axisper_row_fake_quantsymmetric_quantmasks                 r:   $meta__fused_moving_avg_obs_fq_helperr  R	  sO      
LL$((*\ D

3DT"D))r<   c                 H   t        j                  | j                         dk(  d        t        j                  |j                         dk(  d        | j                  \  |j                  \  t        j                  k(  fd       | j	                        S )Nr&  c                       y)Nza must be 2Dr@   r@   r<   r:   r\   zmeta_mm.<locals>.<lambda>m	      ~r<   c                       y)Nzb must be 2Dr@   r@   r<   r:   r\   zmeta_mm.<locals>.<lambda>n	  r  r<   c            	      "    d d  d d d	S )Nz/a and b must have same reduction dim, but got [ro   z] X [r  r@   )M1M2Nrn  s   r:   r\   zmeta_mm.<locals>.<lambda>s	  s&    A!Brd%PRtSUVWUXXZ[r<   )rM   r^   r}   r   r   )r   br  r  r  rn  s     @@@@r:   meta_mmr  j	  sz     
LLA56	LLA56GGEArGGEB	LL
b[ ;;q!r<   c                      |r(t         fdt         j                        D              S t        j                   j
                        S )Nc              3   H   K   | ]  }|vrj                   |   nd   yw)r   Nr   )ri   r  dimsr   s     r:   rk   z+_compute_reduction_shape.<locals>.<genexpr>z	  s'     UDTqatmTZZ]:DTs   ")r]   r   r   rE   compute_reduction_output_shaper   )r   r  r  s   `` r:   r  r  x	  s7    UE$))DTUUU//

DAAr<   c                    t        | t        j                  j                        r| j                  j
                  S t        | d      rEt        | j                  d      r/| j                  j
                  dk7  r| j                  j
                  S y)Nrz   rq   rw   r*  )rg   rM   _subclasses
FakeTensorfake_devicerq   hasattrrz   )rf  s    r:   r.  r.  	  sg    &%++667!!&&&!FMM6*MM&(}}!!!r<   input_tensorr   r  dilationis_transposedgroupsoutput_paddingc                    dt         dt         dt         dt         dt         dt         fd}dt         dt         dt         dt         dt         dt         dt         fd	}	|j                  d
d  }
| j                  d
d  |r||j                  d   z  }n<|j                  d   }|j                  d   |z  | j                  d   k7  rt        d      | j                  d   |gt        |t              r|gt              z  }n t        |      dk(  r|d   gt              z  }t        |t              r|gt              z  }n t        |      dk(  r|d   gt              z  }t        |t              r|gt              z  }n t        |      dk(  r|d   gt              z  }d }|rCt        |t              r|gt              z  }n#t        |      dk(  r|d   gt              z  }n|}t        t                    D ]]  }|r/j                   |	|   ||   ||   |
|   ||   ||                4j                   ||   ||   ||   |
|   ||                _ t        j                  t        d d
d  D              fd       S )Nlnrk  r  r  r   r3   c                 6    | d|z  z   ||dz
  z  z
  dz
  |z  dz   S )a  
        Formula to apply to calculate the length of some dimension of the output

        See: https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html

        Args:
            ln: length of the dimension
            p: padding in that dim
            d: dilation in that dim
            k: kernel size in that dim
            s: stride in that dim
        Returns:
            The output length
        r&  r   r@   )r  rk  r  r  r   s        r:   _formulaz+calc_conv_nd_return_shape.<locals>._formula	  s.     QU
Q!a%[(1,2Q66r<   r8   c                 <    | dz
  |z  d|z  z
  ||dz
  z  z   |z   dz   S )a  
        Formula to apply to calculate the length of some dimension of the output
        if transposed convolution is used.
        See: https://pytorch.org/docs/stable/generated/torch.nn.ConvTranspose2d.html

        Args:
            ln: length of the dimension
            p: padding in that dim
            d: dilation in that dim
            k: kernel size in that dim
            s: stride in that dim
            op: output padding in that dim

        Returns:
            The output length
        r   r&  r@   )r  rk  r  r  r   r8   s         r:   _formula_transposedz6calc_conv_nd_return_shape.<locals>._formula_transposed	  s2    " Q!|a!e#a1q5k1B6::r<   r&  r   r   zInvalid channel dimensionsc              3   &   K   | ]	  }|d kD    ywr   Nr@   r  s     r:   rk   z,calc_conv_nd_return_shape.<locals>.<genexpr>	  s     )=aAE=   c                  .    dt                ddd   dS )NzGiven input size per channel: z&. Calculated output size per channel: r&  z. Output size is too small)r   )r  	ret_shapes   r:   r\   z+calc_conv_nd_return_shape.<locals>.<lambda>	  s(    0d =//8}o >#$r<   )r  r   r[  rg   r   r   r   r   rM   r^   r~   )r  rn  r   r  r  r  r  r  r  r  kernel_sizeout_channelsoutput_padding_listr  r  r  s                 @@r:   calc_conv_nd_return_shaper  	  s   7S 7S 7S 7S 7S 7S 7"; ; ; ; ; ; ;QT ;& ,,qr"Kab!DQ/||A<<?V#|'9'9!'<<;<<##A&5I&'"CI%	V	)s4y('7#)c$i'	W	1:,T*(G$:D	)	X!	QK=3t9,/3ng.#1"2SY"> A%#1!#4"5D	"A"03t9#GAJQKN1I'*	 a'!*hqk;q>6RS9U $ 
LL)9QR=))	$ r<   c                 b    t         j                  j                  |       t         j                  k(  S r7   rM   _prims_commonr"   channels_lasttens    r:   is_channels_lastr  	  s$    44S9U=P=PPPr<   running_meanrunning_vartrainingexponential_average_factorepsilonc                 r     j                   }||j                   n|j                   }	||j                   n|j                   }
 fd} j                  |      j                   |             }|r# j                  |	      } j                  |
      }n" j                  d      } j                  d      }|||fS )Nc                      t               rt        j                  S  j                  t        j                        rt        j                  S t        j                  S r   )r  rM   r  r   r   )r  s   r:   pick_memory_formatz2meta_miopen_batch_norm.<locals>.pick_memory_format
  sF    L)&&&%%E4K4K%L***&&&r<   r   r   )r   r   r\  )r  rn  rp  r  r  r  r  r  r   save_mean_shapesave_var_shaper  r  	save_meansave_vars   `              r:   meta_miopen_batch_normr   
  s     ""I -9,Dl((&,,O*5*A[&&v||N' 
 
 
+
.
.=O=Q
.
RC **?;	)).9 **40	))$/	8##r<   c	           
            fd}	t         ||||||r|nd       }
d}d} j                  |      dk(  rd|
|<    j                  |
      }|j                   |	             }|S )Nc                  d   t               dk(  r&t               st              r+t        j                  S t               rt        j                  S  j	                  t        j
                        rt        j
                  S  j	                  t        j                        rt        j                  S y Nr*  r   )r.  r  rM   r  r   r   preserve_format)r  rn  s   r:   r  z%meta_conv.<locals>.pick_memory_format2
  s    |$.-1A&1I***-***%%E4K4K%L***''e6K6K'L((( Mr<   r   r   r   )r  r   r   r\  )r  rn  rp  r   r  r  r  r  r  r  	shape_outinput_channels_dimoutput_channels_dimr  s   ``            r:   	meta_convr  &
  s    
) *'T	I +,1)*	%&

 
 
+C
&&13&
4CJr<   mkldnnc
           
          t        | ||||d|g       }
| j                  |
      }t        j                  }| j	                         dk(  rt        j
                  }|j                  |      }|S )NFrR  r   )r  r   rM   r  r}   channels_last_3dr\  )r  rn  rp  r  r   r  r  attrscalars	algorithmr  r  out_memory_formats                r:   meta_mkldnn_convolution_defaultr  X
  sp     .&&'8UFB
	 $$Y/!//" % 6 6ff#4f5
r<   c                 b    | j                  g | j                  d d |j                  d         S Nr   r   r   r   )r  rn  rp  r  r  r  s         r:   meta_linear_pointwise_defaultr  o
  s5     %%&Q(:(:3B(?&Qa&QRRr<   mklc                 b    | j                  g | j                  d d |j                  d         S r  r  )r  packed_weightorig_weightrp  r  s        r:   meta_mkl_linearr  z
  s:    ))@,$$Sb)@;+<+<Q+?@ r<   onednnc           
      |   t        | ||||	d|
d       }|t        j                  t        j                  t        j                  t        j
                  fv sJ | j                  ||      }t        |      dv sJ d       t        |      dk(  rt        j                  nt        j                  }|j                  |      }|S )NFr   r2   r6  zonly conv1d/2d are supportedr6  r   )r  rM   r  r  uint8ru  r   r   r  r   r\  )rJ   x_scalex_zpww_scalew_zprp  r   r  r  r  output_scaleoutput_zero_pointoutput_dtyper  r  r  r  r  formats                       r:   meta_qconv_pointwiser  
  s    * .	
	 u~~u{{EJJWWWWkk)<k89~'G)GG'(+I!(;$$AXAXff6f*
r<   c                     |dk(  sJ |S )Nsumr@   )rJ   r  r  r  r  r  accumrp  r   r  r  r  r	  r
  r  accum_scaleaccum_zero_pointbinary_op_namer  unary_op_nameunary_op_argsunary_op_algorithms                         r:   meta_qconv2d_pointwise_binaryr  
  s    2 &&&r<   c                     t        | j                        }|j                  d   |d<   |	t        j                  t        j                  t        j
                  t        j                  fv sJ | j                  ||	      }|S )Nr   r   r   )r   r   rM   r  r  ru  r  r   )rJ   r  r  r  r  r  rp  r	  r
  r  post_op_namepost_op_argspost_op_algorithmr  r  s                  r:   meta_qlinear_pointwiser  
  sa    " AGG}771:Ru~~uzz5;;WWWWkk,lk;
r<   c                    |dk(  r|S t        | j                        }|j                  d   |d<   |
t        j                  t        j                  t        j
                  t        j                  fv sJ | j                  ||
      }|S )Nr  r   r   r   )r   r   rM   r  r  r  ru  r   )rJ   r  r  r  r  r  x_2rp  r	  r
  r  x2_scalex2_zpr  r  r  r  r  r  r  s                       r:   meta_qlinear_pointwise_binaryr!  
  sn    , U"JAGG}771:Ru~~u{{EJJWWWWkk,lk;
r<   c                 v    t        | j                        }|j                  d   |d<   | j                  |      }|S )Nr   r   )r   r   r   )rJ   r  rp  r  r  s        r:   meta_linear_dynamic_fp16r#  
  s6     AGG}771:Rkk,'
r<   	quantizedr  r   c                 .   t        | |||||      \  }}}| j                         dk(  r| j                  d      nd}	t        j                  }
| j                         dk(  r|||g}n|	|||g}t        j
                  || j                  | j                  |
      S Nr6  r   r2   r  )#max_pool2d_checks_and_compute_shaper}   r   rM   r  r   rU   rz   r  r  r   r  r  	ceil_modenInputPlaneoutputHeightoutputWidthr  r   r   s               r:   meta_quantized_max_pool2dr/    s     0;9
		
 $)99;!#3B++99;!{;DK{CD{{++<<'	
 	
r<   c                    t        j                  | j                         dk(  d| j                          d       t        j                  |j                         dk(  d|j                          d       t        j                  | j                  t         j                  t         j
                  t         j                  fv d| j                          t        j                  |j                  t         j                  k(  d|j                          t        j                  |j                  t         j                  k(  d|j                          t        j                  |j                  | j                  k(  d|j                          | j                  | j                  d	      |j                  d	      | j                  
      S )Nr&  zx must be a 2D tensor, got Dzw must be a 2D tensor, got #expected x to be f32/f16/bf16, got expected w to be uint8, got z q_group_size must be int64, got z5q_scale_and_zeros must have the same dtype as x, got r   r   )rM   r^   r}   rU   r  r  r  r  r   r   r   rJ   r  q_group_sizeq_scale_and_zeross       r:   meta_int4mm_packed_weight_cpur7  +  s@   QUUW\%@	#KLQUUW\%@	#KLGGu}}ennEE1!'';	
 	QWW+/KAGG9-UV%++-.|/A/A.BC	
 	##qww.CDUD[D[C\]	
 {{166!9affQiqww{??r<   c                      t        j                   j                         k(  xr  j                     k(   fd       y )Nc                  j    d  d d ddj                          d dj                      z   S )NzExpected a tensor of dimension z and tensor.size[z] == ro   zbut got : dimension z] = r}   r   )r}   dim_sizer   rf  s   r:   r\   z check_dim_size.<locals>.<lambda>C  sN    1#6GzQVW[V\\^_ .?zfll[cNdMe
fgr<   )rM   r^   r}   r   )rf  r}   r;  r   s   ````r:   check_dim_sizer<  @  s6    	LL

>X 6$ >	gr<   c                     d } |d|      \  }}	t        j                  t        |      dv d        t        j                   j                  t         j                  t         j
                  t         j                  t         j                  fv fd       t        |      dk(  r||	}}
n%t        |      dk(  r|d   |d   }}
n |d|      \  }
} |d	|      \  }}t        j                  |d u xs |dk7  d
         j                         dk(  r j                  d      nd} j                  d      } j                  d      } j                  d      }t        ||||
d|      }t        ||	||d|      }t        j                         }t         ||	|
|||dd||||||        j                         dk(  r|||g}n||||g}t        j                  | j                   j                  |      S )Nc                      t        j                  t        |      dv  fd       |d   }t        |      dk(  r|n|d   }||fS )Nr   r&  c                      d  dS )Nzavg_pool2d: 4 must either be a single int, or a tuple of two intsr@   r  s   r:   r\   z1meta_avg_pool2d.<locals>.unpack.<locals>.<lambda>U      l4&(\]r<   r   r   rM   r^   r   r  r  HWs   `   r:   unpackzmeta_avg_pool2d.<locals>.unpackR  G    H]	
 FSQACF!tr<   r  r   r   r&  c                       yNzOavg_pool2d: stride must either be omitted, a single int, or a tuple of two intsr@   r@   r<   r:   r\   z!meta_avg_pool2d.<locals>.<lambda>^      ar<   c                  @    d j                   j                          dS )Nz""avg_pool2d" not implemented for 'r  r  r  s   r:   r\   z!meta_avg_pool2d.<locals>.<lambda>b      6u{{7J7J7L6MQQr<   r   r   r   r  c                       yNzdivisor must be not zeror@   r@   r<   r:   r\   z!meta_avg_pool2d.<locals>.<lambda>o      *r<   r6  r(  r  r   r2   r  )rM   r^   r   rU   r  uint16uint32uint64r}   r   pooling_output_shaperE   r"   pool2d_shape_checkr   rz   )r  r  r   r  r+  count_include_paddivisor_overriderH  kHkWdHdWpadHpadWr  r,  inputHeight
inputWidthr-  r.  r   r   s   `                     r:   meta_avg_pool2drc  H  s    M;/FB	LLFy a 
LLEKKu||U\\RRQ 6{aRB	V	F1IB&)B	7+JD$	LLD 9$4$9*
  %yy{a/UZZ^QF**R.K**R.KBJ'Rr1iPL&z2tRINK//6M



		$ yy{a\;7\;?;;kk||#	 r<   c                     t        | ||||||dd|	|
||||       | j                         }|	}t        |||dz
  |       t        |||dz
  |       t        |||dz
  |       y )Nr   r2   r&  )rX  r}   r<  )r  
gradOutputr  r[  r\  r]  r^  r_  r`  r,  ra  rb  r-  r.  
mem_formatr   nOutputPlanes                    r:   avg_pool2d_backward_shape_checkrh    s    " 



		$ 99;DL:tTAX|<:tTAX|<:tTAX{;r<   c                    t        j                  t        |      dk(  xs t        |      dk(  d        |d   }t        |      dk(  r|n|d   }	t        j                  t        |      dk(  xs t        |      dk(  xs t        |      dk(  d        t        |      dk(  r|n|d   }
t        |      dk(  r|	nt        |      dk(  r|
n|d   }t        j                  t        |      dk(  xs t        |      dk(  d        |d   }t        |      dk(  r|n|d   }t        j                  |d u xs |dk7  d        |j                  }|j	                         dk(  r|d	   nd}|d
   }|d   }|d   }t        ||||
d|      }t        ||	||d|      }t        j                  |      }t        || |||	|
|||||||||       t        j                  ||j                  |j                  |      S )Nr   r&  c                       y)NzKavg_pool2d: kernel_size must either be a single int, or a tuple of two intsr@   r@   r<   r:   r\   z*meta_avg_pool2d_backward.<locals>.<lambda>  s    ]r<   r   c                       yrL  r@   r@   r<   r:   r\   z*meta_avg_pool2d_backward.<locals>.<lambda>  rM  r<   c                       y)NzGavg_pool2d: padding must either be a single int, or a tuple of two intsr@   r@   r<   r:   r\   z*meta_avg_pool2d_backward.<locals>.<lambda>  s    Yr<   c                       yrQ  r@   r@   r<   r:   r\   z*meta_avg_pool2d_backward.<locals>.<lambda>  rR  r<   r6  r(  rS  r  r   r  )rM   r^   r   r   r}   rW  rE   r"   rh  r   rU   rz   )gradOutput_r  r  r   r  r+  rY  rZ  r[  r\  r]  r^  r_  r`  
input_sizer  r,  ra  rb  r-  r.  rf  s                         r:   meta_avg_pool2d_backwardrp    s    
LLKA6[!1Q!6] 
QB;1$+a.B	LLFq@CK1,@Fq0@a 6{aVAYB6{a3v;!+;RB	LLG.S\Q.Y 1:Dw<1$4'!*D	LLD 9$4$9*
 J$yy{a/Z^QFR.KR.KBJ'Rr1iPL&z2tRINK,,U3J#



$ ;;kk|| 	 r<   c                     t        j                  t        |      dv d        |d   }t        |      dk(  r|n|d   }t        |      dk(  r|n|d   }	t        j                  | xs t        |      dv d        t        j                   j                  t         j                  t         j
                  t         j                  t         j                  fv fd       |s|n|d   }
|s|nt        |      dk(  r|
n|d   }|s|	nt        |      dk(  r|
n|d   }t        j                  t        |      dv d        |d   }t        |      dk(  r|n|d   }t        |      dk(  r|n|d   }t        j                   j                  d	v d
        t        j                  | xs |dk7  d         j                  d      } j                  d      } j                  d      } j                  d      } j                  d      }t        ||||
d|      }t        ||||d|      }t        ||	||d|      }t         ||||	|
|||||ddd||||||dd        j                  dk(  r j                  ||||f      S  j                  |||||f      S )Nr   r2   c                       yNzFavg_pool3d: kernel_size must be a single int, or a tuple of three intsr@   r@   r<   r:   r\   z!meta_avg_pool3d.<locals>.<lambda>      Xr<   r   r   r&  c                       yNzJavg_pool3d: stride must be omitted, a single int, or a tuple of three intsr@   r@   r<   r:   r\   z!meta_avg_pool3d.<locals>.<lambda>$  r  r<   c                  @    d j                   j                          dS )Nz""avg_pool3d" not implemented for 'r  r  r  s   r:   r\   z!meta_avg_pool3d.<locals>.<lambda>(  rO  r<   c                       yNzBavg_pool3d: padding must be a single int, or a tuple of three intsr@   r@   r<   r:   r\   z!meta_avg_pool3d.<locals>.<lambda>0      Tr<   r6  rR  c                       yNz9non-empty 4D or 5D (batch mode) tensor expected for inputr@   r@   r<   r:   r\   z!meta_avg_pool3d.<locals>.<lambda>8      Kr<   c                       yrQ  r@   r@   r<   r:   r\   z!meta_avg_pool3d.<locals>.<lambda>=  rR  r<   r(  rS  r  r   zavg_pool3d()T)check_input_sizer6  )rM   r^   r   rU   r  rT  rU  rV  r   r   rW  pool3d_shape_checkr   )r  r  r   r  r+  rY  rZ  kTr[  r\  dTr]  r^  padTr_  r`  r  nslicesitimeiheightiwidthotimeoheightowidths   `                       r:   meta_avg_pool3dr    s    
LLKF"X 
QB;1$+a.B;1$+a.B	LL
+c&kV+\ 
LLEKKu||U\\RRQ vayBc&kQ&6F1IBc&kQ&6F1IB	LLGT 1:Dw<1$4'!*Dw<1$4'!*D	LL

fK
 
LL5 0A 5*
 ZZ]FjjnGJJrNEjjnGZZ^F D"aCE"7Bb!YGG!&"dB9EF





			-2 zzQ@AAHIIr<   c                    t        j                  t        |      dv d        |d   }t        |      dk(  r|n|d   }	t        |      dk(  r|n|d   }
t        j                  | xs t        |      dv d        |s|n|d   }|s|	nt        |      dk(  r|n|d   }|s|
nt        |      dk(  r|n|d   }t        j                  t        |      dv d        |d   }t        |      dk(  r|n|d   }t        |      dk(  r|n|d   }t        j                  |j                  dv d	        t        j                  | xs |dk7  d
        |j	                  d      }|j	                  d      }|j	                  d      }|j	                  d      }t        ||||d|      }t        ||	||d|      }t        ||
||d|      }t        || |||	|
||||||||||||d       |j                  |j                        S )Nrr  c                       yrt  r@   r@   r<   r:   r\   z*meta_avg_pool3d_backward.<locals>.<lambda>w  ru  r<   r   r   r&  c                       yrw  r@   r@   r<   r:   r\   z*meta_avg_pool3d_backward.<locals>.<lambda>  r  r<   c                       yrz  r@   r@   r<   r:   r\   z*meta_avg_pool3d_backward.<locals>.<lambda>  r{  r<   r|  c                       yr~  r@   r@   r<   r:   r\   z*meta_avg_pool3d_backward.<locals>.<lambda>  r  r<   c                       yrQ  r@   r@   r<   r:   r\   z*meta_avg_pool3d_backward.<locals>.<lambda>  rR  r<   r(  rS  r  r   zavg_pool3d_backward())	rM   r^   r   r   r   rW  avg_pool3d_backward_shape_checkr   r   )r*  r  r  r   r  r+  rY  rZ  r  r[  r\  r  r]  r^  r  r_  r`  r  r  r  r  otime_for_shape_checkoheight_for_shape_checkowidth_for_shape_checks                           r:   meta_avg_pool3d_backwardr  i  s    
LLKF"X 
QB;1$+a.B;1$+a.B	LL
+c&kV+\ vayBc&kQ&6F1IBc&kQ&6F1IB	LLGT 1:Dw<1$4'!*Dw<1$4'!*D	LL

fK
 
LL5 0A 5*
 jjnGJJrNEjjnGZZ^F0D"aS27Bb!YW1&"dB9U#





', ??5;;''r<   c                 ,    t        j                   j                  dk(  xs  j                  dk(   fd        j                  d d t	        |      z   }t        j                         }t        j                  | j                   j                  |      S )Nr2   r6  c                  "    d j                    S )Nz"Expected 3D or 4D tensor, but got r   r   s   r:   r\   z*meta_adaptive_avg_pool2d.<locals>.<lambda>      4TZZLAr<   r  r  )
rM   r^   r   r   r]   rE   r"   r   rU   rz   )r   output_sizer  r   s   `   r:   meta_adaptive_avg_pool2dr    s|    	LL		Q($))q.A ::cr?U;%77L//5M ;;jj{{#	 r<   c                      t        j                   j                  dk(  xs  j                  dk(   fd        j                   j                  d d t        |      z         S )Nr6  rR  c                  "    d j                    S )Nz"Expected 4D or 5D tensor, but got r   r   s   r:   r\   z*meta_adaptive_avg_pool3d.<locals>.<lambda>  r  r<   rS  )rM   r^   r   r   r   r]   )r   r  s   ` r:   meta_adaptive_avg_pool3dr    sO    	LL		Q($))q.A >>$**Sb/E+,>>??r<   c                      j                   }t        d|      D ].  t        j                   j	                        dkD   fd       0 t        j                  |dk(  xs |dk(  fd       t        j                  j
                   j
                  k(   fd       t        j                  }t              rt        j                  }j                  j                        j                  |      S )	Nr   r   c                  *    d j                    d dS )Nz{adaptive_avg_pool2d_backward(): Expected grad_output to have non-zero                       size for non-batch dimensions,  with dimension  being emptyr   )grad_outr  s   r:   r\   z4meta__adaptive_avg_pool2d_backward.<locals>.<lambda>  s$     66>nn5EEUVWUXXdfr<   r2   r6  c                  "    d j                    S )NzBadaptive_avg_pool2d_backward(): Expected 3D or 4D tensor, but got r   r   s   r:   r\   z4meta__adaptive_avg_pool2d_backward.<locals>.<lambda>  s    TUYU_U_T`ar<   c                  <    dj                    d j                    S Nexpected dtype z! for `grad_output` but got dtype r   )r  r   s   r:   r\   z4meta__adaptive_avg_pool2d_backward.<locals>.<lambda>  s    /$**-Nx~~N^_r<   r   )r   r   rM   r^   r   rU   r   r  r  r   r   r\  )r  r   r   r   r  s   ``  @r:   "meta__adaptive_avg_pool2d_backwardr    s    ==D1d^MM!q f	
  
LL	TQYa 
LL

hnn$_ ++M++>>$**%((}(EEr<   c                 d    t        | d       t        j                  |t        j                        S )Nadaptive_avg_pool3d_backwardr   )!_adaptive_pool_empty_output_checkrM   r   r:  r*  r   s     r:   "meta__adaptive_avg_pool3d_backwardr    s(     &k3QRD0N0NOOr<   r*  c                       j                   }t        d|      D ]/  t        j                   j	                        dkD   fd       1 y )Nr   r   c                  .      dj                    d dS )Nzc(): Expected grad_output to have non-zero size for non-batch dimensions, but grad_output has sizes r  r  r   )r  r*  r  s   r:   r\   z3_adaptive_pool_empty_output_check.<locals>.<lambda>  s*    * --8->->,??OPQsR^`r<   )r   r   rM   r^   r   )r*  r  r   r  s   `` @r:   r  r    sB    D1d^Q!#	
 r<   c                      j                   }t        j                  |dv  fd       t        d|      D ].  t        j                   j	                        dkD   fd       0 t        j                  t        |      dk(  d        d}d}d} j                   dk(  r j	                  d      }|dz  } j	                  |dz
        }|\  }} j                   d	k(  r;|||f} j                  |      }	 j                  |t        j                  
      }
|	|
fS ||||f}t        j                         } j                  |      j                  |      }	 j                  |t        j                  
      j                  |      }
|	|
fS )Nr  c                  "    d j                    S )Nz:adaptive_max_pool2d(): Expected 3D or 4D tensor, but got: r   r  s   r:   r\   z*meta_adaptive_max_pool2d.<locals>.<lambda>      LU[[MZr<   r   r   c                  *    dj                    d  dS )Nzjadaptive_max_pool2d(): Expected input to have non-zero size for non-batch dimensions, but input has sizes r  r  r   r  r  s   r:   r\   z*meta_adaptive_max_pool2d.<locals>.<lambda>	       '',{{m3CA3lTr<   r&  c                       y)NzCadaptive_max_pool2d(): internal error: output_size.size() must be 2r@   r@   r<   r:   r\   z*meta_adaptive_max_pool2d.<locals>.<lambda>      Ur<   r6  r2   r   r   )r   rM   r^   r   r   r   r   r   rE   r"   r\  )r  r  r   dimHsizeBsizeDosizeHosizeWr   r  r   r   r  s   `           @r:   meta_adaptive_max_pool2dr    sw    ::D	LLZ 1d^JJqMA	
  
LLKAU
 DEEzzQ

1	JJtax E NFFzzQFF+	ooi(//)5;;/?G|E662	33E:ooi(++-+H//)5;;/?BB' C 
 G|r<   c                 N     j                   }t        j                  |dv  fd       t         d       t        j                  j                   j                  k(   fd       t        j                        }j                  j                        j                  |      S )Nr  c                  "    d j                    S )NzKadaptive_max_pooling2d_backward(): Expected 3D or 4D grad_output, but got: r   r*  s   r:   r\   z3meta_adaptive_max_pool2d_backward.<locals>.<lambda>4  s    ]^i^o^o]pqr<   adaptive_max_pool2d_backwardc                  <    dj                    d j                    S r  r   )r*  r  s   r:   r\   z3meta_adaptive_max_pool2d_backward.<locals>.<lambda>;  s    /%++.OP[PaPaObcr<   r   )
r   rM   r^   r  rU   rE   r"   r   r   r\  )r*  r  r   r   r   s   ``   r:   !meta_adaptive_max_pool2d_backwardr  .  s     D	LLq
 &k3QR	LL{(((c
 //6M??5;;'***GGr<   c                      j                   }t        j                  |dv  fd       t        d|      D ].  t        j                   j	                        dkD   fd       0 t        j                  t        |      dk(  d        d}d}d}|dk(  r j	                  d      }|dz  } j	                  |      }|\  }}}|d	k(  r||||f}	n|||||f}	 j                  |	      }
 j                  |	t        j                  
      }|
|fS )Nr|  c                  "    d j                    S )Nz:adaptive_max_pool3d(): Expected 4D or 5D tensor, but got: r   r  s   r:   r\   z*meta_adaptive_max_pool3d.<locals>.<lambda>H  r  r<   r   r   c                  *    dj                    d  dS )Nzjadaptive_max_pool3d(): Expected input to have non-zero size for non-batch dimensions, but input has sizes r  r  r   r  s   r:   r\   z*meta_adaptive_max_pool3d.<locals>.<lambda>M  r  r<   r2   c                       y)NzCadaptive_max_pool3d(): internal error: output_size.size() must be 3r@   r@   r<   r:   r\   z*meta_adaptive_max_pool3d.<locals>.<lambda>U  r  r<   rR  r6  r   )r   rM   r^   r   r   r   r   r   )r  r  r   dimDr  r  osizeTr  r  r   r  r   r  s   `           @r:   meta_adaptive_max_pool3dr  B  s    ::D	LLZ 1d^JJqMA	
  
LLKAU
 DEEqy

1	JJtE(FFFqyFFF3	E666:	
//)
$Cooiu{{o;G<r<   c                 P    t        | d       |j                  |j                        S )Nadaptive_max_pool3d_backward)r  r   r   )r*  r  r   s      r:   !meta_adaptive_max_pool3d_backwardr  n  s"     &k3QR??5;;''r<   c                 >    |t        d      | j                  |      S )Nz:cannot repeat_interleave a meta tensor without output_size)r[  r   )repeatsr  s     r:   meta_repeat_interleave_Tensorr  u  s%    WXX[))r<   c                     | j                   j                  sJ |j                   j                  sJ t        | j                  |j                        }| j	                  |t        | j                               S r  )rU   r-  r)   r   r   r   )realimagr   s      r:   meta_complexr  |  s[     ::''''::''''!$**djj9I>>)+Ftzz+R>SSr<   )
fill_valuer  c                d    | j                  || j                         ft        j                        S r  )r   r}   rM   r   )r   r   r  s      r:   nonzero_staticr    s&     >>4,EJJ>??r<   c                 
   t        j                  t        j                  d        t        j                  | j                         | j                         fd| j                         ft         j                  | j                        S )Nc                       y)NaY  The register_meta function for torch.nonzero() raises unimplemented by default, as a correct data-independent implementation does not exist. This implementation returns a fake value, assuming all elements of the tensor are non-zero. To enable this registration, please set 'torch.fx.experimental._config.meta_nonzero_assume_all_nonzero' to True.r@   r@   r<   r:   r\   znonzero.<locals>.<lambda>  s     Sr<   r   rU   rz   )	rM   _check_not_implementedr  meta_nonzero_assume_all_nonzerorT  r   r}   r   rz   r   s    r:   nonzeror    sf     
  22	S 	txxz"	
DJJLjj{{	 r<   c           
          t        j                  t              d        g }t              D ]  \  ft        j                  j                  t         j
                  t         j                  t         j                  t         j                  fv d        j                  t         j                  t         j                  fv rȉj                         }t        |      t        j                  j                  z    j                  k   fd       t        j                        D ]`  t        j                  j                      j                  z      k(   fd       |j                  |j                  d             b ]|j                         p|j                          |t        j                  t               j                  k   fd       dd lm} t%         |j&                         t               j                  k  r*j                  d        t               j                  k  r*d}d}D ]  |dk(  rd}|dk(  rd	} n d
}|sg }g }t              D ]*  \  	|j                         |j                         , t              D ]*  \  	|j                         |j                         ,  j)                  |       |g g g t              D ]\  \  }	@rj                   j                  |	          )j                   j                  |	          Ht%        j                        ^ fd}
 j+                  z   z         }ddlm}  | j1                         dk(        r|S  |
       }t3        j4                  |      }t%        |      t%        t        t        |                  k7  r~t3        j6                  |j                  |      }t3        j8                  |      }t3        j6                  |t3        j:                  |            }|j=                  |j?                         |      }|S )Nc                       y)Nz#at least one index must be providedr@   r@   r<   r:   r\   z#meta_index_Tensor.<locals>.<lambda>  s    (Mr<   c                       y)Nz?tensors used as indices must be long, int, byte or bool tensorsr@   r@   r<   r:   r\   z#meta_index_Tensor.<locals>.<lambda>  s    Yr<   c                  "    d j                    S )N)too many indices for tensor of dimension r2  r   s   r:   r\   z#meta_index_Tensor.<locals>.<lambda>  s    G		{Sr<   c            	      N    dj                    d  dj                    dz    S )NzThe shape of the mask 
 at index z0 does not match the shape of the indexed tensor r   )r  r   jr  r   s   r:   r\   z#meta_index_Tensor.<locals>.<lambda>  s:    "8ZPQs SJJN**U_`ade`e_f!hr<   r   c                  <    dj                    dt                dS )Nr  z (got rp   )r   r   )r   r   s   r:   r\   z#meta_index_Tensor.<locals>.<lambda>  s    ;DII;fSQX\NZ[\r<   r   Fr&  Tc                     z   z   }t        | j                               }dgt              z  |t              t        | j                        t              z
   | j	                  ||      S )zI
        This follows restride_src in TensorAdvancedIndexing.cpp
        r   )r   r   r   r   r   )r   r   r   after_shapebefore_shapereplacement_shapes      r:   _restride_srcz(meta_index_Tensor.<locals>._restride_src   so     00;>t{{}%KL#PSQ
 K
L!C

Oc+6F$FG ug..r<   guard_size_oblivious) rM   r^   r   	enumeraterU   r   r  ru  r  r   r   r   r   r   r   selecttorch._refsr   r   r*   r   r   r   r  r   rE   3compute_elementwise_output_logical_to_physical_perm
apply_permr    invert_permr   r   )r   r   r
  r  refsstatehas_contiguous_subspacer  transposed_indicesr}   r  r  r  restrided_selfperm
perm_shaper   r  r  r  r   r  r  r  s   ``               @@@@@@@r:   meta_index_Tensorr    s   	LLg MN &(Fg&5LL

EIIuzz5::NNY {{uzz5::66--/K""

Ndii/S uzz*A&&A$**QU*;;h
 MM'..A"67 + e$MM% / '0 G	LLG		!\
 (4(('23G
g,
"t g,
" E#A: aZ}   #'
 #!'*HAu A"))%0 + "'*HAu}A"))%0 + ||D!$ !LK#%(
U= ""4::c?3##DJJsO4 $U[[ 1 )	/ ..(99KG
HCJDJJLA-.

 #4(NDD^TD DzT%D	*++%%cii6
66zB
%%j%2C2CD2IJ
nnSXXZ4Jr<   c                     d }d }d }|
d   r| j                  |j                               }|
d   r| j                  |j                               }|
d   r| j                  |      }|||fS )Nr   r   r&  r   r   )grad_output_input_weight_bias_sizes_optr   r  r  
transposedr  r  output_maskbackend_grad_inputbackend_grad_weightbackend_grad_biass                 r:   meta_convolution_backwardr	  !  sy      1~)33FKKMB1~*44W\\^D1~(22>B 35FGGr<   c                   j                  d      }j                  d      }| j                  ||f      } t        j                  j	                         dk(  d        t        j                  j	                         dk(  d        t        j                  j                  d      j                  d      k(  fd       t        j                  j                  d      j                  d      k(  fd       t        j                  | j                  d      |k(  xr | j                  d      |k(  d	        | j                  | j                               S )
Nr   r&  r2   c                       yrw  r@   r@   r<   r:   r\   zmeta_addbmm.<locals>.<lambda>E  rx  r<   c                       yrz  r@   r@   r<   r:   r\   zmeta_addbmm.<locals>.<lambda>F  rx  r<   r   c                  P    d j                  d       dj                  d       S )Nz8batch1 and batch2 must have same number of batches, got r   r   r   r|  r}  s   r:   r\   zmeta_addbmm.<locals>.<lambda>I  s,    J6;;WX>JZZ_`f`k`klm`n_opr<   c            
          d j                  d       d j                  d       dj                  d       dj                  d       d	S )Nz#Incompatible matrix sizes for bmm (r   rJ   r&  r   rp   r   r  s   r:   r\   zmeta_addbmm.<locals>.<lambda>M  sL    1&++a.1A6;;q>BR S;;q>"!FKKN#316r<   c                       y)Nz.self tensor does not match matmul output shaper@   r@   r<   r:   r\   zmeta_addbmm.<locals>.<lambda>T  s    @r<   )r   r  rM   r^   r}   r   )r   r|  r}  r  r  r  r  s    ``    r:   meta_addbmmr  ?  s    ;;q>D;;q>D;;d|$D	LL"$HI	LL"$HI	LLA&++a.(p 
LLA&++a.(	
 
LL		!51!5@ >>$))+&&r<   c                 @    | j                  | j                               S r7   r  )r   rI  kwargss      r:   meta_randint_liker  Y  s    >>$))+&&r<   )
grad_scale	found_infc       	         n    | |||||fD ])  t        j                  t        t              fd       + y )Nc                       dt                S Nz'exponent must be a tensor list but got rq   ls   r:   r\   z#meta__fused_adam_.<locals>.<lambda>t      =d1gYGr<   rM   r^   rg   r   )r   gradsexp_avgsexp_avg_sqsmax_exp_avg_sqsstate_stepslrbeta1beta2weight_decayepsamsgradmaximizer  r  r  s                  @r:   meta__fused_adam_r+  ^  s6    & E8[/;Oq$G	
 Pr<   c       	             | |||||fD ])  t        j                  t        t              fd       + d } ||        ||       ||       ||       ||      fS )Nc                       dt                S r  r  r  s   r:   r\   z"meta__fused_adam.<locals>.<lambda>  r  r<   c                 R    | D cg c]  }t        j                  |       c}S c c}w r7   r  )tensor_listr  s     r:   empty_like_listz)meta__fused_adam.<locals>.empty_like_list  s%    -89[  #[999s   $r  )r   r  r   r!  r"  r#  r$  r%  r&  r'  r(  r)  r*  r  r  r0  r  s                   @r:   meta__fused_adamr1  x  so    & E8[/;Oq$G	
 P: 	!$( r<   c                 j    t        j                   j                         dk(  d        t        j                  j                         dk(  d        t        j                   j                  t         j                  u  fd       t        j                  j                  t         j                  u fd       t        j                   j                  d      j                  d      k(   fd        j                   j                  d      j                  d      ft         j                  	      S )
Nr&  c                       y)Nza must be a 2D tensorr@   r@   r<   r:   r\   zmeta__int_mm.<locals>.<lambda>      '>r<   c                       y)Nzb must be a 2D tensorr@   r@   r<   r:   r\   zmeta__int_mm.<locals>.<lambda>  r4  r<   c                  "    d j                    S )Nzexpected self to be int8, got r   )r   s   r:   r\   zmeta__int_mm.<locals>.<lambda>      0	:r<   c                  "    d j                    S )Nzexpected mat2 to be int8, got r   )r  s   r:   r\   zmeta__int_mm.<locals>.<lambda>  r7  r<   r   r   c            
          d j                  d       d j                  d       dj                  d       dj                  d       d	S )Nz'Incompatible matrix sizes for _int_mm (r   rJ   r   r   rp   r   r   r  s   r:   r\   zmeta__int_mm.<locals>.<lambda>  sH    5affQi[!&&) M66!9+Qqvvayk,r<   r   )rM   r^   r}   rU   ru  r   r   rv  r:  s   ``r:   meta__int_mmr;    s     
LLA>?	LLA>?	LL	5::: 
LL	5::: 
LL	q	QVVAY	
 ;;q	166!9-U[[;AAr<   c                 f    t        j                   j                         dk(  d        t        j                   j                  t         j                  u  fd        j                  d      } j                  d      dz  } j                  |dz  ||dz  z  d|dz  ft         j                  	      S )
Nr&  c                       yNzw must be a 2D tensorr@   r@   r<   r:   r\   z2meta__convert_weight_to_int4pack.<locals>.<lambda>  r4  r<   c                  "    d j                    S Nr3  r   r  s   r:   r\   z2meta__convert_weight_to_int4pack.<locals>.<lambda>      .qwwi8r<   r   r      r      r   )rM   r^   r}   rU   r  r   r   rv  r  inner_k_tilesr>  r  s   `   r:    meta__convert_weight_to_int4packrG    s    	LLA>?	LL	5;;8 	
q	A	q	AA;;F-"$%Q		
 kk   r<   c                 J    t        j                   j                         dk(  d        t        j                   j                  t         j                  u  fd        j                  d      } j                  d      } j                  ||dz  ft         j                        S )Nr&  c                       yr>  r@   r@   r<   r:   r\   z:meta__convert_weight_to_int4pack_for_cpu.<locals>.<lambda>  r4  r<   c                  "    d j                    S Nzexpected w to be int32, got r   rA  s   r:   r\   z:meta__convert_weight_to_int4pack_for_cpu.<locals>.<lambda>  rB  r<   r   r   r   )rM   r^   r}   rU   rv  r   r   r  rE  s   `   r:   (meta__convert_weight_to_int4pack_for_cpurL    s    	LLA>?	LL	5;;8 	
q	A	q	A;;	
AFkk   r<   c                 .    t        j                   j                         dk(  d        t        j                  j                         dk(  d        t        j                   j                  t         j                  t         j
                  t         j                  fv  fd       t        j                  j                  t         j                  u fd        j                   j                  d      j                  d      dz   j                  	      S )
Nr&  c                       yNzx must be a 2D tensorr@   r@   r<   r:   r\   z*meta__weight_int4pack_mm.<locals>.<lambda>  r4  r<   r6  c                       y)Nzw must be a 4D tensorr@   r@   r<   r:   r\   z*meta__weight_int4pack_mm.<locals>.<lambda>  r4  r<   c                  "    d j                    S Nr2  r   rJ   s   r:   r\   z*meta__weight_int4pack_mm.<locals>.<lambda>      5aggY?r<   c                  "    d j                    S rK  r   rA  s   r:   r\   z*meta__weight_int4pack_mm.<locals>.<lambda>  rB  r<   r   rC  r   
rM   r^   r}   rU   r  r  r  rv  r   r   r4  s   ``  r:   meta__weight_int4pack_mmrW    s    	LLA>?	LLA>?	LL	EMM5==%..AA? 
LL	5;;8 ;;qvvay!&&)a-qww;??r<   c                 (    t        j                   j                         dk(  d        t        j                  j                         dk(  d        t        j                   j                  t         j                  t         j
                  t         j                  fv  fd       t        j                  j                  t         j                  u fd        j                   j                  d      j                  d       j                        S )Nr&  c                       yrO  r@   r@   r<   r:   r\   z2meta__weight_int4pack_mm_for_cpu.<locals>.<lambda>  r4  r<   c                       yr>  r@   r@   r<   r:   r\   z2meta__weight_int4pack_mm_for_cpu.<locals>.<lambda>  r4  r<   c                  "    d j                    S rR  r   rS  s   r:   r\   z2meta__weight_int4pack_mm_for_cpu.<locals>.<lambda>  rT  r<   c                  "    d j                    S r@  r   rA  s   r:   r\   z2meta__weight_int4pack_mm_for_cpu.<locals>.<lambda>  rB  r<   r   r   )
rM   r^   r}   rU   r  r  r  r  r   r   r4  s   ``  r:    meta__weight_int4pack_mm_for_cpur]        	LLA>?	LLA>?	LL	EMM5==%..AA? 
LL	5;;8 ;;qvvay!&&)177;;;r<   c                 (    t        j                   j                         dk(  d        t        j                  j                         dk(  d        t        j                   j                  t         j                  t         j
                  t         j                  fv  fd       t        j                  j                  t         j                  u fd        j                   j                  d      j                  d       j                        S )Nr&  c                       yrO  r@   r@   r<   r:   r\   z;_weight_int4pack_mm_with_scales_and_zeros.<locals>.<lambda>  r4  r<   c                       yr>  r@   r@   r<   r:   r\   z;_weight_int4pack_mm_with_scales_and_zeros.<locals>.<lambda>  r4  r<   c                  "    d j                    S rR  r   rS  s   r:   r\   z;_weight_int4pack_mm_with_scales_and_zeros.<locals>.<lambda>  rT  r<   c                  "    d j                    S rK  r   rA  s   r:   r\   z;_weight_int4pack_mm_with_scales_and_zeros.<locals>.<lambda>  rB  r<   r   r   rV  )rJ   r  r5  qScaleqZeross   ``   r:   )_weight_int4pack_mm_with_scales_and_zerosrf    r^  r<   r   r  c                     | |z   dz
  |z  |z  S rd  r@   r:  s     r:   kai_rounduprh    s    UQY1!!r<   c                   	
 | dk(  ry||k(  r(d}d}d}dddd fdfd} ||||||      S |dz  d	k(  rC||z  d	k(  r:d}d}d}dddd		fd
}	
fdd 
	fd	fd |||||||      S y y y )Nr6  rC  r  r&  c                 8    t        ||z  d      }t        | |      S )Nr6  rh  )r  krsrkr_sr_roundedup4s       r:   kai_k_roundedupz3get_kai_packed_weight_size.<locals>.kai_k_roundedup  s#     $/rBw#: "1&677r<   c                 X     | ||      }|dz  dk(  sJ d       ||dz  z   z   z   z  S )Nr&  r   zk_internal must be evenr@   )	r  nrrl  rm  
k_internalro  kai_num_bytes_biaskai_num_bytes_multiplier_rhskai_num_bytes_sum_rhss	        r:   9kai_get_rhs_packed_stride_rhs_pack_nxk_qsi4cxp_qsu4cxs1s0z]get_kai_packed_weight_size.<locals>.kai_get_rhs_packed_stride_rhs_pack_nxk_qsi4cxp_qsu4cxs1s0  sY     -QB7
"Q1,G.GG,1_23+, )) r<   c                 >    t        | |      |z  }| ||||      z  S r7   rk  )r>  r  rq  rl  rm  num_rowsrv  s         r:   7kai_get_rhs_packed_size_rhs_pack_nxk_qsi4cxp_qsu4cxs1s0z[get_kai_packed_weight_size.<locals>.kai_get_rhs_packed_size_rhs_pack_nxk_qsi4cxp_qsu4cxs1s0'  s6     'q"-3 O2r2r<   rD  r   c                 |    ||z  dk(  sJ |	z  dk(  sJ |z  dk(  sJ t        | |      |z  }| |||||      z  S r  rk  )
r>  r  rq  rl  rm  blrx  kai_bl_multiple_of;kai_get_rhs_packed_stride_rhs_pack_nxk_qsi4c32p_qsu4c32s1s0kai_nr_multiple_ofs
          r:   9kai_get_rhs_packed_size_rhs_pack_nxk_qsi4c32p_qsu4c32s1s0z]get_kai_packed_weight_size.<locals>.kai_get_rhs_packed_size_rhs_pack_nxk_qsi4c32p_qsu4c32s1s0?  sp     RA~%~//A555//A555&q"-3 Q2r2rr<   c                     ||z  dk(  sJ |
z  dk(  sJ |z  dk(  sJ  	       } | |      } ||      }|||z  z   z   z  S r  r@   )r  rq  rl  rm  r{  num_bytes_multiplier_rhsnum_blocks_per_rownum_bytes_per_blockr|  #kai_get_bf16_datatype_size_in_bytesr~  kai_num_blocks_per_rowrs  kai_num_bytes_per_blockru  s           r:   r}  z_get_kai_packed_weight_size.<locals>.kai_get_rhs_packed_stride_rhs_pack_nxk_qsi4c32p_qsu4c32s1s0O  s     RA~%~//A555//A555 ,O+P(%;Ar%B"&=0'# (+==+,() r<   c                       y)Nr&  r@   r@   r<   r:   r  zGget_kai_packed_weight_size.<locals>.kai_get_bf16_datatype_size_in_bytese  s    r<   c                 6    |z  dk(  sJ t        | |      |z  S r  rk  )r  r{  r|  s     r:   r  z:get_kai_packed_weight_size.<locals>.kai_num_blocks_per_rowh  s)    //A555"1b)R//r<   c                 (    | z  dk(  sJ | dz  |z   S )Nr   r&  r@   )r{  r  r|  s     r:   r  z;get_kai_packed_weight_size.<locals>.kai_num_bytes_per_blockl  s'    //A555a#;;;r<   r@   )n_bitsr  K	groupsizekai_nrkai_krkai_srry  r  r|  r  r}  rv  ro  r~  r  rs  rt  r  ru  s            @@@@@@@@@@@r:   get_kai_packed_weight_sizer    s    {>FFF$%!+,(!"8
 K1fff  ^q Q]a%7FFF$%!!"!"!#  ,0< M1fffi u &8 [ r<   c                 V    t        j                   j                  t         j                  u  fd       t         j                  j
                  j                         r||k(  r|j                  t         j                  k(  s2||k  re|dz  dk(  r]||z  dk(  rU|j                  t         j                  k(  r8t        d|||      } j                  t        |      t         j                        S  j                         |j                         z   } j                  |t         j                        S )Nc                  "    d j                    S r@  r   )weightss   r:   r\   z2meta__dyn_quant_pack_4bit_weight.<locals>.<lambda>{  s    .w}}o>r<   rD  r   r6  r   )rM   r^   rU   r  backendskleidiaiis_availablerQ   r  r  r   r  r   )r  scales_zerosrp  
block_sizein_featuresout_featurespacked_weight_sizes   `      r:    meta__dyn_quant_pack_4bit_weightr  u  s     
LL$> ~~++-	{	"|'9'9U[['H$R1$j(A-""enn4 8|[*
   %7!8 LL <+=+=+??/u{{CCr<   c                     t        j                   j                         dk(  d        t        j                   j                  t         j                  fv  fd        j                  d      } j                  || j                        S )Nr&  c                       y)Nzinput must be a 2D tensorr@   r@   r<   r:   r\   z-meta__dyn_quant_matmul_4bit.<locals>.<lambda>  s    )Dr<   c                  "    d j                    S )Nzexpected input to be f32, got r   )inps   r:   r\   z-meta__dyn_quant_matmul_4bit.<locals>.<lambda>  s    0<r<   r   r   )rM   r^   r}   rU   r  r   r   )r  packed_weightsr  r  r  r  s   `     r:   meta__dyn_quant_matmul_4bitr    sg     
LLa!DE	LL		emm_$< 	A==L		=::r<   c                 (    t        j                   j                         dk(  d        t        j                   j                  t         j                  t         j
                  t         j                  fv  fd       t        j                  j                         dk(  d        t        j                  j                  t         j                  u fd        j                   j                  d      j                  d       j                        S )Nr&  c                       yrO  r@   r@   r<   r:   r\   z*meta__weight_int8pack_mm.<locals>.<lambda>  r4  r<   c                  "    d j                    S rR  r   rS  s   r:   r\   z*meta__weight_int8pack_mm.<locals>.<lambda>  rT  r<   c                       yr>  r@   r@   r<   r:   r\   z*meta__weight_int8pack_mm.<locals>.<lambda>  r4  r<   c                  "    d j                    S )Nzexpected w to be int8, got r   rA  s   r:   r\   z*meta__weight_int8pack_mm.<locals>.<lambda>  s    -aggY7r<   r   r   )
rM   r^   r}   rU   r  r  r  ru  r   r   )rJ   r  q_scaless   `` r:   meta__weight_int8pack_mmr    s    	LLA>?	LL	EMM5==%..AA? 
LLA>?	LL	5::7 ;;qvvay!&&)177;;;r<   c                 f    t        j                   j                         dk\   fd       t        j                  j                         dk\  fd       t        j                   j                  d      j                  d      k(   fd       t        j                  t	        j
                   j                        d        t        j                  t	        j
                  j                        d        t        j                  |dk\  d	        t        j                  d
v fd        j                  d      }j                  d      } j                  d d }j                  d d }t        t        j                  ||            }|j                  ||g        j                  |      S )Nr&  c                  ,    d j                          dS )Nz1cdist only supports at least 2D tensors, X1 got: r1  r   )x1s   r:   r\   z$meta_cdist_forward.<locals>.<lambda>      CBFFH:QOr<   c                  ,    d j                          dS )Nz1cdist only supports at least 2D tensors, X2 got: r1  r   )x2s   r:   r\   z$meta_cdist_forward.<locals>.<lambda>  r  r<   r   c                  P    d j                  d       dj                  d       S )Nz4X1 and X2 must have the same number of columns. X1: r   z X2: r   )r  r  s   r:   r\   z$meta_cdist_forward.<locals>.<lambda>  s*    Frwwr{mSXY[Y`Y`acYdXefr<   c                       y)Nz=cdist only supports floating-point dtypes, X1 got: {x1.dtype}r@   r@   r<   r:   r\   z$meta_cdist_forward.<locals>.<lambda>  r   r<   c                       y)Nz=cdist only supports floating-point dtypes, X2 got: {x2.dtype}r@   r@   r<   r:   r\   z$meta_cdist_forward.<locals>.<lambda>  r   r<   r   c                       y)Nz)cdist only supports non-negative p valuesr@   r@   r<   r:   r\   z$meta_cdist_forward.<locals>.<lambda>  s    !Lr<   Nr   r&  c                      d  S )Nz%possible modes: None, 1, 2, but was: r@   )compute_modes   r:   r\   z$meta_cdist_forward.<locals>.<lambda>  s    7~Fr<   r  )rM   r^   r}   r   rE   is_float_dtyperU   r   r   broadcast_shapesextendr   )	r  r  rk  r  r1r2batch_tensor1batch_tensor2r  s	   `` `     r:   meta_cdist_forwardr    sJ   	LL
AO 
LL
AO 
LL
rwwr{"f 
LLRXX&O 
LLRXX&O 
LLaLM	LL$F 
B	BHHSbMMHHSbMM..}mLMLR!<<%%r<   c                 4   |j                   d   }|j                   d   }|j                   d   }|j                   d d }|j                   d d }	t        t        j                  ||	            }
|
j	                         }|j                  ||g       t        j                  |
      }|dk(  s|dk(  s
|dk(  s|dk(  rt        j                  |      S |t        |j                         k7  r|j                  |      }t        j                  |t        j                        S )Nr   r  r   r   )r   r   rM   r  copyr  mathprod
zeros_liker  r   r   )ro  r  r  rk  cdistc1r  r  r  r  r  tensor1_expand_sizebatch_products                r:   meta_cdist_backwardr    s     
"B	"B	"BHHSbMMHHSbMM 6 6}m TU.335Bx(II23M	Qw"'R1W(:##d288n,YY*+Be.E.EFFr<   c	                     t        j                  j                  t         j                  t         j                  fv fd       t        j                  j                  t         j                  t         j                  fv fd       t        j                  t        j                   j                         fd       j                  d      }	|rt        j                  |	dk\  d        |	dz  }	 j                  |	 j                  d            }
}t        j                  |t        k(  d        t        j                  j                  dk(  fd       t        j                  j                         j                         k(  fd	       fd
d fd}t              dk7  r|j                  j                  d            }j                  j                               }|t        k(  r"j                  |	 j                  d            }nj                  d      }n | |
|      }|t        t        fv s|s!j                  j                  d            }nj                  d      }j                  |	      }j                  d   }|t        k(  rA|rt        j                  |dk\  d        |dz  }j                  | j                  d         }nj                  |j                               }|
|||fS )Nc                  "    d j                    S )Nz(expected indices to be long or int, got r   )r   s   r:   r\   z$meta_embedding_bag.<locals>.<lambda>      :7==/Jr<   c                  "    d j                    S )Nz(expected offsets to be long or int, got r   )r  s   r:   r\   z$meta_embedding_bag.<locals>.<lambda>  r  r<   c                  "    d j                    S )Nz/expected weight to be floating point type, got r   )rn  s   r:   r\   z$meta_embedding_bag.<locals>.<lambda>  s    A&,,Pr<   r   r   c                       yNz1include_last_offset: numBags should be at least 1r@   r@   r<   r:   r\   z$meta_embedding_bag.<locals>.<lambda>  s    Gr<   c                       y)Nz@embedding_bag: per_sample_weights only supported with mode='sum'r@   r@   r<   r:   r\   z$meta_embedding_bag.<locals>.<lambda>  s    Vr<   c                  $    d j                    dS )Nz1expected per_sample_weights to be 1D tensor, got r1  r2  )per_sample_weightss   r:   r\   z$meta_embedding_bag.<locals>.<lambda>  s    GHZH_H_G``abr<   c                  N    dj                          d j                          dS )Nz%expected per_sample_weights.numel() (z$ to be the same as indices.numel() (rp   r   )r   r  s   r:   r\   z$meta_embedding_bag.<locals>.<lambda>  s/    78J8P8P8R7S T66=mmo5FaIr<   c                 D     | ||      xr |j                  d      dk(  S Nr   r   r   )r_  r  r5  padding_idxis_fast_path_index_selects       r:   is_fast_path_index_select_scalez;meta_embedding_bag.<locals>.is_fast_path_index_select_scale  s(    %c6;?XELLQROWXDX	
r<   c                     | j                   t        j                  k(  xs | j                   t        j                  k(  xr1 | j	                  d      dk(  xr |j	                  d      dk(  xr |dk  S Nr   r   )rU   rM   rQ   rO   r   )r_  r5  r  s      r:   r  z5meta_embedding_bag.<locals>.is_fast_path_index_select"  sb    YY%++%@ejj)@  

1" a A%  a		
r<   c                 2    | | |||      S  | ||      S r7   r@   )r_  r  r5  r  r  r  s       r:   is_fast_pathz(meta_embedding_bag.<locals>.is_fast_path*  s)    23v{SS,S&+FFr<   cpuc                       yr  r@   r@   r<   r:   r\   z$meta_embedding_bag.<locals>.<lambda>D  s    Or<   )rM   r^   rU   r   r  rE   r  r   r   MODE_SUMr   r   r.  MODE_MAX	MODE_MEANr   )rn  r   r  scale_grad_by_freqr  sparser  include_last_offsetr  num_bagsr5  r  
offset2bagbag_sizemax_indicesfast_path_sumnumBagsr  r  s   ```   `          @@r:   meta_embedding_bagr    s}    
LL%**eii00J 
LL%**eii00J 
LLV\\*P
 ||AHMG	
 	AhA7F%HV	
 	##q(b	
 	$$&'--/9	



G 7u$&&w||A7
$$W\\^48!++Hfkk!nEK!++A.K$V-?UIx(( **7<<?;J **1-J$$X.--"8"qLO 1!++GV\\!_EK!++HMMO<K:x44r<   c                     t        | ||g| \  }}}}t        |      dk(  r|j                  |j                               }||||fS )Nr  )r  r.  r   r   )rn  r   r  rG   r5  r  r  r  s           r:   meta_embedding_bag_forward_onlyr  M  sX    0B1#'1-FJ+ 7u$$$W\\^4:x44r<   c                     |r|S | j                   j                  s| j                   j                  r| j                   S |rt        j                  S | j                   S r7   )rU   r-  r!  rM   r   )r  rU   promote_int_to_longs      r:   _get_reduction_dtyper  W  sD    {{$$(>(>{{	zz;;r<   r   c                    t        | |d      }t        j                  | j                  |      }t	        | ||      }| j                  ||      S )NT)r  r   )r  rE   r  r   r  r   )r  r  r  rU   r  r  s         r:   meta_nansumr  d  sI     (u$OLT2D+E4AL??<|?<<r<   c           	          t        j                  | j                  t        t	        | j                                           }| j                  |      S r7   )rE   r  r   r]   r   r}   r   )r  r  s     r:   meta_medianr  m  s<    77U5-.L ??<((r<   c                    t        |       dk(  rt        j                  d       t        j                  | j                  |f      }t        | ||      }| j                  |      | j                  |t        j                        fS )Nr*  zmedian CUDA with indices outputr   )	r.  rE   alert_not_deterministicr  r   r  r   rM   r   )r  r}   r  r  s       r:   meta_median_mode_dimr  u  sp     5V#%%&GH


u{{SF
3C+E3@L%EJJ7 r<   c                     | S r7   r@   r   s    r:   meta_logical_not_r    r  r<   c                    t        j                  t        |      | j                         k\  d        t	        |      D ]"  \  t        j                  dk\  fd       $ t        |      | j                         z
  }d|z  t        | j                        z   }t        t        |            D cg c]  }||   ||   z   }}| j                  |      S c c}w )Nc                       y)NzZNumber of dimensions of repeat dims can not be smaller than number of dimensions of tensorr@   r@   r<   r:   r\   zmeta_repeat.<locals>.<lambda>  s    lr<   r   c                      d d  S )Nz"Repeats cannot be negative, found r  r@   )r  reps   r:   r\   zmeta_repeat.<locals>.<lambda>  s    8ZsKr<   r%  )	rM   r^   r   r}   r  r]   r   r   r   )r   r  num_new_dimensionspadded_sizer  target_sizer  s       ` @r:   meta_repeatr    s    	LLG
"l G$31HK	
 % W
2++eDJJ.??K8=c'l8KL8K1;q>GAJ.8KKL>>+&& Ms   1Cc                     | S r7   r@   r   s    r:   
meta_zero_r    r  r<   c                 z    t        |t        j                        r t        | j                  |j                         | S r7   )rg   rM   r   r`   r   r   r   s     r:   meta_binop_inplacer    s)     %&

EKK8Kr<   c                     d }d }d } ||       r ||      rt        d       ||       r ||      st        d      t        |t        j                        r t	        | j
                  |j
                         | S )a*  
    Some checks for inplace ops.
    Checks for promotion rules for some dtypes.
    int.add/sub_(float) and bool.add/sub_(others) are rejected.
    Promoting in these in-place operations would require reallocating
    and copying over elements, hence not allowed.
    Checks for alpha param.
    c                     t        | t              rt        j                  | j                        S t        | t
              S r7   )rg   r#   rE   ri  rU   r   rj   s    r:   is_integericz.meta_binop_inplace_alpha.<locals>.is_integeric  s.    c:&))#))44c7++r<   c                     t        | t              rt        j                  | j                        S t        | t
              S r7   )rg   r#   rE   r  rU   r   r  s    r:   
is_floaticz,meta_binop_inplace_alpha.<locals>.is_floatic  s.    c:&''		22c9--r<   c                     t        | t              rt        j                  | j                        S t        | t
              S r7   )rg   r#   rE   is_boolean_dtyperU   r   r  s    r:   is_booleanicz.meta_binop_inplace_alpha.<locals>.is_booleanic  s.    c:&))#))44c8,,r<   z]Promotion of int.add/sub_(float) in in-place ops are not possible due to element size change.z_Promotion of book.add/sub_(others) in in-place ops are not possible due to element size change.)r[  rg   rM   r   r`   r   )r   r   r  r  r  r  s         r:   meta_binop_inplace_alphar    sz    $,.- Dj/k
 	

 D,u"5m
 	
 %&

EKK8Kr<   c                 8    t        | t        j                        S NrB   rK   r   rF   )r   r  s     r:   
meta_roundr    s    <DD r<   c                 l    t        j                  t        j                  j                         fd       t        t         j                        r8t        j                  t        j                  j                         fd       y t        j                  t        t               fd       y )Nc                  &      dj                    S )Nz7: Expected input tensor to have an integral dtype. Got r   )r  r   s   r:   r\   z#shift_dtype_check.<locals>.<lambda>  s    7)RSWS]S]R^_r<   c                  &      dj                    S )Nz6: Expected shift value to have an integral dtype. Got r   r  r  s   r:   r\   z#shift_dtype_check.<locals>.<lambda>  s    wiUVYV_V_U`ar<   c                        d S )Nz): Expected shift value to be an int. Got r@   r  s   r:   r\   z#shift_dtype_check.<locals>.<lambda>  s    wiHNr<   )rM   r^   rE   ri  rU   rg   r   r   )r  r   r  s   ```r:   shift_dtype_checkr    sp    	LLtzz*_ #u||$""399-a	

 	sG$N	
r<   c                 T    t        d| |       t        | |t        j                        S )Nrshiftr  r  rK   r   rF   r  s     r:   meta_rshiftsr!    )    he,e$C$K$K r<   c                 T    t        d| |       t        | |t        j                        S )Nlshiftr  r   r  s     r:   meta_lshiftsr%    r"  r<   c                 8    | j                  | j                        S r7   r  r   s    r:   	meta_zeror'    s    >>$**%%r<   c                     | S r7   r@   r   r  s     r:   
meta_fill_r*    r  r<   c                 ,    t        j                  |       S r7   r  r)  s     r:   	meta_fillr,  !      D!!r<   c                     | S r7   r@   r   s    r:   
meta_relu_r/  &  r  r<   c                 :    t        | |t        j                        S r  r  )r   r   r  s      r:   meta__add_relur1  +  s     e$C$K$K r<   c                 ,    t        j                  |       S r7   r  r   noiselowerr  r  r;  s         r:   meta_rrelu_with_noiser6  3  s    
 D!!r<   c                 V    t        j                  |       t        j                  |      fS r7   r  r3  s         r:    meta_rrelu_with_noise_functionalr8  ;  s%     D!5#3#3E#:::r<   c                     | S r7   r@   )r   r5  r  r  r;  s        r:   meta_rrelu_with_noise_r:  B  s	     Kr<   c                 ,    t        j                  |       S r7   r  r   r   r   
accumulates       r:   meta_index_putr>  I  r-  r<   c                 F    t        | j                  |j                         | S r7   r`   r   )r   r  values      r:   meta_masked_fill_rB  N  s    DJJ

3Kr<   c                     | j                  | j                               j                  t        j                  |             }|S r   )r   r   r\  rE   r"   )r   r  r  masked_scales       r:   meta__masked_scalerE  T  s<    >>$))+.1111$7 2 L r<   c                      t        j                  |j                  t         j                  t         j                  fv d        t        j                   j                  j                  k(   fd        S )Nc                       y)NzMask must be bool or uint8r@   r@   r<   r:   r\   z&meta_masked_scatter_.<locals>.<lambda>_  s    9Ur<   c                  <    d j                    dj                    S )NzEmasked_scatter: expected self and source to have same dtypes but got r   r   )r   r  s   r:   r\   z&meta_masked_scatter_.<locals>.<lambda>c  s      **U6<<.:r<   )rM   r^   rU   r   r  )r   r  r  s   ` `r:   meta_masked_scatter_rI  \  sU    	LL

uzz5;;//1U 
LL

fll"	:
 Kr<   c                     t        | |      \  } }t        j                  | t        j                        }t	        |||      S r   )r*   rM   r   r   rI  )r   r  r  r5  s       r:   meta_masked_scatterrK  i  s;     "$-JD$d%2I2IJFf55r<   c                 $    | j                  |      S r7   r  )r   r  rr  s      r:   meta_masked_scatter_backwardrM  q  s    >>%  r<   c                     | S r7   r@   r<  s       r:   meta_index_put_rO  v  r  r<   c                 8    | j                  | j                        S r7   )viewr   r   s    r:   
meta_aliasrR  {  s    99TZZ  r<   c                   
 t        j                  | j                         dk(  d        t        j                  |j                         dk(  d        | j                         }|j                         
|d   |d   |d   }
d   }||ft        j                  
d   k(  xr 
d   k(  
fd       |r| j                  t         j
                  k(  xs | j                  t         j                  k(  xr |t         j                  k(  }t        j                  || j                  k(  xs |d        |j                        j                  |      }	n|j                        }	|sUSt        j                  j                         dk(  d	        t        j                  j                         k(  fd
       |	S )Nr2   c                       yrw  r@   r@   r<   r:   r\   z)common_meta_baddbmm_bmm.<locals>.<lambda>  rx  r<   c                       yrz  r@   r@   r<   r:   r\   z)common_meta_baddbmm_bmm.<locals>.<lambda>  rx  r<   r   r&  r   c            	      .    d d d d    d d    d	S r  r@   r  s   r:   r\   z)common_meta_baddbmm_bmm.<locals>.<lambda>  s3    RSURV
l<?*;2l1o=NbRr<   c                       y)Nzfout_dtype only supported for torch.float32 output with float16/bfloat16 inputs or same as input dtypesr@   r@   r<   r:   r\   z)common_meta_baddbmm_bmm.<locals>.<lambda>  s    |r<   c                       y)Nzself must be a 3D tensorr@   r@   r<   r:   r\   z)common_meta_baddbmm_bmm.<locals>.<lambda>  s    6Pr<   c                  0    d  dj                          S )Nz*Expected an input tensor shape with shape z but got shape: r   )r  self_baddbmms   r:   r\   z)common_meta_baddbmm_bmm.<locals>.<lambda>  s    @M]^j^o^o^q]rsr<   )
rM   r^   r}   r   rU   r  r  r  r   r\  )r|  r}  is_bmmrZ  rr  r  res_rowsres_colssupported_out_dtyper5  r  r  r  r  s      `      @@@@r:   common_meta_baddbmm_bmmr_    s   	LL"$HI	LL"$HI;;=L;;=L	aB#AAHAHx*K	LLQ2E,q/5E"E	R
 LLEMM)KV\\U^^-K)5==( 	 	%<)<|	
 !!+.11)< !!+.l.\%%'1,.PQ;.s	

 Mr<   c                     t        | |d      S )NTr_  )r   r|  s     r:   meta_bmmrb    s    "4t44r<   c                      t        | |d|      S )NT)rr  ra  )r   r|  rr  s      r:   meta_bmm_dtyperd    s    "4tyIIr<   c                 h    | |z  }| |z  }|dk7  r"t        |dk        t        |dk        k7  r|dz  }|S r  )r   )rJ   yqr  s       r:   div_rtnrh    sB    	QA	AA 	Av4A;$q1u+-	QHr<   c                     t        | |z   |z   ||dz
  z  z
  dz
  |r|dz
  ndz   |      dz   }|r|dz
  |z  | |z   k\  r|dz  }|S r  )rh  )	inputSize
kernelSizer  r	  r   r  r+  
outputSizes           r:   pooling_output_shape_pad_lrrm    s     	 *q.)* 	
 'vzA/ 	
 		  Nf$	E(99!OJr<   c           	          t        j                  |dk7  d        t        j                  dk\  fd       t        j                  dz
  z  dz   dz  k  fd       t        | ||      S )Nr   c                       y)Nzstride should not be zeror@   r@   r<   r:   r\   z&pooling_output_shape.<locals>.<lambda>  s    &Ar<   c                      d  S )Nz'pad must be non-negative, but got pad: r@   pads   r:   r\   z&pooling_output_shape.<locals>.<lambda>  s    %LSE#Rr<   r   r&  c                      d d d  S )NzApad should be at most half of effective kernel size, but got pad=z, kernel_size=z and dilation=r@   )r  rk  rr  s   r:   r\   z&pooling_output_shape.<locals>.<lambda>  s"    OPSu U%,nXJ@r<   )rM   r^   rm  )rj  rk  rr  r   r  r+  s    `` ` r:   rW  rW    ss    	LL1AB	LLRS	LLa8+a/A55	
 ':sC9 r<   c           	      >   	
  j                         }	t        j                  dkD  xr dkD  d        t        j                  |dkD  xr |dkD  d        t        j                  |dkD  xr |dkD  d         j                  d      dk7  xr  j                  d      dk7  }|t        j                  k(  r5t        j                  |dk(  xr |xr  j                  d      dk7  d	        nWt        j                  |dk(  xr  j                  d      dk7  xr |xs |dk(  xr |xr  j                  d      dk7   fd
       t        j                  dz  k\  xr dz  k\  fd       t        j                  dk\  xr dk\  
	fd       y )Nr   c                       y)NzCkernel size should be greater than zero, but got kH: {kH}, kW: {kW}r@   r@   r<   r:   r\   z$pool2d_shape_check.<locals>.<lambda>  r  r<   c                       y)Nz>stride should be greater than zero, but got dH: {dH}, dW: {dW}r@   r@   r<   r:   r\   z$pool2d_shape_check.<locals>.<lambda>  s    Pr<   c                       y)Nz\dilation should be greater than zero, but got dilationH: {dilationH}, dilationW: {dilationW}r@   r@   r<   r:   r\   z$pool2d_shape_check.<locals>.<lambda>  s    nr<   r   r&  r6  r2   c                       y)NzExpected 4D (batch mode) tensor expected for input with channels_last layout with optional 0 dim batch size for input, but got: {input.size()}r@   r@   r<   r:   r\   z$pool2d_shape_check.<locals>.<lambda>  s     Qr<   c                  *    d j                          S )NzYExpected 3D or 4D (batch mode) tensor with optional 0 dim batch size for input, but got: r   r  s   r:   r\   z$pool2d_shape_check.<locals>.<lambda>  s    opupzpzp|o}~r<   c                       d d d d  S )NzKpad should be smaller than or equal to half of kernel size, but got padW = z	, padH = z, kW = z, kH = r@   )r[  r\  r_  r`  s   r:   r\   z$pool2d_shape_check.<locals>.<lambda>  s$     ygbT>r<   c                  .    d d  d d d d dS NzGiven input size: (rJ   z). Calculated output size: (z). Output size is too smallr@   )ra  rb  r,  rg  r-  r.  s   r:   r\   z$pool2d_shape_check.<locals>.<lambda>  s8    %k]!K=* N$$0><.+ O##r<   )r}   rM   r^   r   r  )r  r[  r\  r]  r^  r_  r`  	dilationH	dilationWr,  ra  rb  r-  r.  r   r   
valid_dimsrg  s   ```  ``  `````   @r:   rX  rX    s   " 99;DL	LL
Q26U 
LL
Q26P 
LLA')a-n
 A!#:

1(:J+++AI;*;A!);Q	
 	QY<5::a=A-<* A	?j?UZZ]a-?~	
 
LL
a4+B!GtO	> 
LLq.\Q.	# 	#r<   r  r  r[  r\  r  r]  r^  pTpHpW	dilationTr}  r~  r  r  r  r  r  r  r  c           
      J   	
  j                   }t        j                  dkD  xr dkD  xr dkD  fd       t        j                  dkD  xr dkD  xr dkD  fd       t        j                  dkD  xr dkD  xr dkD  fd       t        j                  |dv  fd       t        |      D ]:  |dk(  rdk(  rt        j                   j	                        dkD   fd       < |r/t        j                  k\  xr k\  xr k\  fd	       t        j                  d
z  k\  xr d
z  
k\  xr d
z  	k\  	
fd       t        j                  dk\  xr dk\  xr dk\  fd       y )Nr   c                      d d  d S )Nz5kernel size should be greater than zero, but got kT: z, kH: z, kW: r@   )r[  r  r\  s   r:   r\   z$pool3d_shape_check.<locals>.<lambda>A  s    $fRDrd,r<   c                      d d  d S )Nz0stride should be greater than zero, but got dT: z, dH: z, dW: r@   )r]  r  r^  s   r:   r\   z$pool3d_shape_check.<locals>.<lambda>H  s    >rd&FSURVWr<   c                      d d  d S )Nz9dilation should be greater than zero, but got dilationT: z, dilationH: z, dilationW: r@   )r}  r  r~  s   r:   r\   z$pool3d_shape_check.<locals>.<lambda>N  s    #M)M)Vr<   r|  c                  &      dj                    S )Nz/: Expected 4D or 5D tensor for input, but got: r   )r  r  s   r:   r\   z$pool3d_shape_check.<locals>.<lambda>V  s    7)J5;;-Xr<   rR  c                  L      dj                    dj                         dS )NzZ: Expected input's non-batch dimensions to have positive length, but input has a shape of z and non-batch dimension z has length zero!)r   r   )r  r  r  s   r:   r\   z$pool3d_shape_check.<locals>.<lambda>_  s.    ) --2[[M+EJJqM?:KMr<   c                  .    d d  d d d d dS )Nzinput image (T: rZ  r?  z ) smaller than kernel size (kT:  kH:  kW: rp   r@   )r  r  r  r[  r  r\  s   r:   r\   z$pool3d_shape_check.<locals>.<lambda>i  s4    "5'gYd6( C$$&4uRDbT<r<   r&  c                  ,    d d d  d d d S )NzHpad should be smaller than or equal to half of kernel size, but got kT: r  r  z padT: z padW: z padH: r@   )r[  r  r\  r  r  r  s   r:   r\   z$pool3d_shape_check.<locals>.<lambda>q  s1    $eB4uRDt72$gbTKr<   r   c                  :    d d d  d d d d d dS r|  r@   )r  r  r  r  r  r  r  s   r:   r\   z$pool3d_shape_check.<locals>.<lambda>y  sD    !'!E7!G9AfX F((/y%'!F8 L'(r<   )r   rM   r^   r   r   )r  r  r  r[  r\  r  r]  r^  r  r  r  r  r}  r~  r  r  r  r  r  r  r  r  r   r  s   `````````````````````  @r:   r  r  %  s   0 ::D	LL
Q$26$b1f	
 
LL
Q$26$b1f	
 
LLA9)a-9IM	
 
LLX
 4[19aJJqMA	
	  RK:GrM:fl 	
 
LL
Q"6a26"q&B,	
 	
 
LL
3v{3w!|	
 	
r<   c                 j   | j                   }t        | |||||||	|
||||||||||||       t        |||dz
  |       t        |||dz
  |       t        |||dz
  |       t        |||dz
  |       t        |||dz
  |       t        |||dz
  |       t        |||dz
  |       t        |||dz
  |       y )Nr6  r2   r&  r   r   r  r<  )r  r*  r   r  r  r[  r\  r  r]  r^  r  r  r  r  r}  r~  r  r  r  r  r  r  r  r   s                           r:   max_pool3d_backward_shape_checkr    s    2 ::D








+0 ;dQh8;dQh6;dQh8;dQh77D$(G47D$(E27D$(G47D$(F3r<   c                     | j                   }t        | ||||||||	|
|ddd|||||||d       t        |||dz
  |       t        |||dz
  |       t        |||dz
  |       t        |||dz
  |       y )Nr   Tr6  r2   r&  r  )r  r*  r  r  r[  r\  r  r]  r^  r  r  r  r  r  r  r  r  r  r  r   s                       r:   r  r    s    * ::D








			-2 ;dQh8;dQh6;dQh8;dQh7r<   c                    d } |d|      \  }}t        j                  t        |      dv d        t        |      dk(  r||}
}	n |d|      \  }	}
 |d|      \  }} |d|      \  }}| j                  d	      }| j                  d
      }| j                  d      }t	        j
                  |       }|t         j                  k(  r)t        j                  | j                         dk(  d        nR|t         j                  k(  r(t        j                  | j                         dv d        nt        j                  dd        t        ||||	||      }t        ||||
||      }t        | |||	|
||||||||||       |||fS )Nc                      t        j                  t        |      dv  fd       |d   }t        |      dk(  r|n|d   }||fS )Nr?  c                      d  dS )Nzmax_pool2d: rA  r@   rB  s   r:   r\   zEmax_pool2d_checks_and_compute_shape.<locals>.unpack.<locals>.<lambda>  rC  r<   r   r   rD  rE  s   `   r:   rH  z3max_pool2d_checks_and_compute_shape.<locals>.unpack  rI  r<   r  rJ  c                       y)NzOmax_pool2d: stride must either be omitted, a single int, or a tuple of two intsr@   r@   r<   r:   r\   z5max_pool2d_checks_and_compute_shape.<locals>.<lambda>  rM  r<   r   r   r  r  rS  r  r   r6  c                       y)NzMnon-empty 4D (batch mode) tensor expected for input with channels_last layoutr@   r@   r<   r:   r\   z5max_pool2d_checks_and_compute_shape.<locals>.<lambda>  s    cr<   r  c                       y)Nz9non-empty 3D or 4D (batch mode) tensor expected for inputr@   r@   r<   r:   r\   z5max_pool2d_checks_and_compute_shape.<locals>.<lambda>!      Or<   Fc                       y)Nz?Unsupport memory format. Supports only ChannelsLast, Contiguousr@   r@   r<   r:   r\   z5max_pool2d_checks_and_compute_shape.<locals>.<lambda>&  s    Ur<   )rM   r^   r   r   rE   r"   r  r}   r   rW  rX  )r  r  r   r  r  r+  rH  r[  r\  r]  r^  r_  r`  r}  r~  r,  ra  rb  r   r-  r.  s                        r:   r)  r)    s    M;/FB	LLFy a 6{aRB&)B	7+JD$!*h7Iy**R.K**R.KBJ//6M+++IIK1c	
 
%11	1IIK6!O	

 	U	

 (Rr9iXL&z2tRIVK



$ k11r<   c                 |    t        |||||      \  }t        j                  j                   j                  k(   fd       |j                  fd}	 |	         |	|       t        j                        }
t        j                  j                  j                  j                  |
      S )Nc                  <    dj                    d j                    S )NzExpected dtype z  for `gradOutput` but got dtype r   r  s   r:   r\   z7meta_max_pool2d_with_indices_backward.<locals>.<lambda>V  s    /$**-MkN_N_M`ar<   c                 l    t        | dz
         t        | dz
         t        | dz
         y )Nr2   r&  r   )r<  )r  rg  r   r-  r.  s    r:   _check_dim_sizez>meta_max_pool2d_with_indices_backward.<locals>._check_dim_size\  s9    q$q,7q$q,7q$q+6r<   r  )
r)  rM   r^   rU   r   rE   r"   r   r   rz   )r*  r   r  r   r  r  r+  r   r,  r  r   rg  r   r-  r.  s   ``         @@@@r:   %meta_max_pool2d_with_indices_backwardr  A  s     	,k67Hi		
 
LL

k'''a
 L99D7
 K G//5M;;

jj{{#	 r<   c                    t        | |||||      \  }}}| j                         dk(  r| j                  d      nd}	t        j                  |       }
| j                         dk(  r|||g}n|	|||g}t        j                  || j                  | j                  |
      t        j                  |t
        j                  | j                  |
      fS r'  )
r)  r}   r   rE   r"   rM   r   rU   rz   r   r*  s               r:   meta_max_pool2d_with_indicesr  m  s     	,{FGXy		
  %yy{a/UZZ^QF//6Myy{a\;7\;?++<<'		
 	++<<'		
 r<   c           	         
 t        j                   j                  dv  fd        j                  }t        |dz
  |      D ]?  
t        j                   j	                  
      dkD  d j	                          d
 d       A t        j                  t              dk(  d	        t        j                  t        |      dk(  d
         j	                  d      } j	                  d       j	                  d      |dk(  r j	                  d      }nd}t        j                   j                  j                  k(  d        t        j                  j                  dk(  fd       j	                  d      }j	                  d      }j	                  d      
t        j                  ||k\  d       t        j                  ||k(  d        t        j                  
dk(  
fd       t        j                  |d   d   z   dz
  k  fd       t        j                  |d   d   z   dz
  k  fd        j                         dk(  r|||d   |d   g}	n||d   |d   g}	t        j                  |	 j                   j                        t        j                  |	t         j                   j                        fS )Nr  c                  "    d j                    S )Nz:fractional_max_pool2d: Expected 3D or 4D tensor, but got: r2  r   s   r:   r\   z,meta_fractional_max_pool2d.<locals>.<lambda>  s    LTYYKXr<   r2   r   z^fractional_max_pool2d: Expected input to have non-zero  size for non-batch dimenions, but got r  z emptyr&  c                       y)NzNfractional_max_pool2d: kernel_size musteither be a single int or tuple of Intsr@   r@   r<   r:   r\   z,meta_fractional_max_pool2d.<locals>.<lambda>       2r<   c                       y)NzOfractional_max_pool2d: output_size must either be a single int or tuple of Intsr@   r@   r<   r:   r\   z,meta_fractional_max_pool2d.<locals>.<lambda>  r  r<   rS  r  r   r6  r   c                       y)Nz6Expect _random_samples to have the same dtype as inputr@   r@   r<   r:   r\   z,meta_fractional_max_pool2d.<locals>.<lambda>  s    Hr<   c                  "    d j                    S )Nz1Expect _random samples to have 3 dimensions got, r2  )random_sampless   r:   r\   z,meta_fractional_max_pool2d.<locals>.<lambda>  s    CNDWDWCXYr<   z=Expect _random_samples.size(0) no less then input batch size.c                       y)Nz<Expect _random_samples.size(1) equals to input channel size.r@   r@   r<   r:   r\   z,meta_fractional_max_pool2d.<locals>.<lambda>      Nr<   c                      d  dS )Nz/Expect _random_samples.size(2) equals to 2 got .r@   )r  s   r:   r\   z,meta_fractional_max_pool2d.<locals>.<lambda>  s    #RSTRUUV!Wr<   c                      dd    d  S )Nz%fractional_max_pool2d: kernel height r   z' is too large relative to input height r@   )input_heightr  s   r:   r\   z,meta_fractional_max_pool2d.<locals>.<lambda>  s    7A7GGno{n|}r<   c                      dd    d  S )Nz$fractional_max_pool2d: kernel width r   z& is too large relative to input width r@   )input_widthr  s   r:   r\   z,meta_fractional_max_pool2d.<locals>.<lambda>  s    6{1~6FFlmxlyzr<   r  )rM   r^   r   r   r   r   rU   r}   r   rz   r   )r   r  r  r  r   input_channelsinput_batchr>  cr   r  r  r  s   `` `      @@@r:   meta_fractional_max_pool2dr    s   	LL		VX 99D4!8T"IIaL166:iik]BRSTRUU[]	
 # 
LLKA	2
 
LLKA	2 YYr]N99R=L))B-Kqyiil	LL

n***H 
LLq Y
 	AAAAAA	LL	[G 
LL	^N 
LLaWX	LLAQ'!+|;} 
LLAQ'!+{:z
 xxzQ^[^[^LAA? 	**;;	

 	++;;	
 r<   c                 |   t        j                  t        |      dv d        |d   }t        |      dk(  r|n|d   }t        |      dk(  r|n|d   }t        j                  | xs t        |      dv d        |s|n|d   }	|s|nt        |      dk(  r|	n|d   }
|s|nt        |      dk(  r|	n|d   }t        j                  t        |      dv d        |d   }t        |      dk(  r|n|d   }t        |      dk(  r|n|d   }t        j                  t        |      dv d        |d   }t        |      dk(  r|n|d   }t        |      dk(  r|n|d   }t        j                  | j                  d	v d
        | j                  dk(  r| j	                  d      nd}| j	                  d      }| j	                  d      }| j	                  d      }| j	                  d      }t        ||||	||      }t        ||||
||      }t        ||||||      }t        | |||||	|
|||||||||||||d       | j                  dk(  xr& t        j                  |       t         j                  k(  }| j                  dk(  rK| j                  d      }|j                          xr  |j                  t         j                        }||||f}n|||||f}| j                  |      }| j                  |t         j                        }|r@|j                  t         j                        }|j                  t         j                        }||fS )Nrr  c                       yNzMmax_pool3d: kernel_size must either be a single int, or a tuple of three intsr@   r@   r<   r:   r\   z.meta_max_pool3d_with_indices.<locals>.<lambda>      _r<   r   r   r&  c                       yNzQmax_pool3d: stride must either be omitted, a single int, or a tuple of three intsr@   r@   r<   r:   r\   z.meta_max_pool3d_with_indices.<locals>.<lambda>      cr<   c                       yNzImax_pool3d: padding must either be a single int, or a tuple of three intsr@   r@   r<   r:   r\   z.meta_max_pool3d_with_indices.<locals>.<lambda>      [r<   c                       yNzJmax_pool3d: dilation must be either a single int, or a tuple of three intsr@   r@   r<   r:   r\   z.meta_max_pool3d_with_indices.<locals>.<lambda>  r  r<   r|  c                       yr~  r@   r@   r<   r:   r\   z.meta_max_pool3d_with_indices.<locals>.<lambda>  r  r<   rR  r(  rS  r  r   zmax_pool3d_with_indices()r6  r   r   )rM   r^   r   r   r   rW  r  rE   r"   r  r   r   r   r   r\  )r  r  r   r  r  r+  r  r[  r\  r  r]  r^  r  r  r  r  r}  r~  r  r  r  r  r  r  r  r  r  input_channels_last_checkr   r  r   s                                  r:   meta_max_pool3d_with_indicesr    sJ    
LLKF"_ 
QB;1$+a.B;1$+a.B	LL
+c&kV+c vayBc&kQ&6F1IBc&kQ&6F1IB	LLG[ 
B7|q gajB7|q gajB	LLH\ I ]a/	Xa[I ]a/	Xa[I	LL

fK
  %zzQUZZ^AFjjnGJJrNEjjnGZZ^F BIyIE"7BB	9MG!&"b"iKF








#+2 	

aXE77>%BXBXX  zzQ$)OOA$6!)7799
'5500 6 
 	
 eWf5	WeWf=	
//)
$Cooiu{{o;Gff5#9#9f:**5+A+A*B<r<   c                    t        j                  t        |      dv d        |d   }t        |      dk(  r|n|d   }	t        |      dk(  r|n|d   }
t        j                  | xs t        |      dv d        |s|n|d   }|s|	nt        |      dk(  r|n|d   }|s|
nt        |      dk(  r|n|d   }t        j                  t        |      dv d        |d   }t        |      dk(  r|n|d   }t        |      dk(  r|n|d   }t        j                  t        |      dv d        |d   }t        |      dk(  r|n|d   }t        |      dk(  r|n|d   }t        j                  |j                  d	v d
        |j	                  d      }|j	                  d      }|j	                  d      }|j	                  d      }| j	                  d      }| j	                  d      }| j	                  d      }t        || ||||	|
|||||||||||||||d       |j                  dk(  xr& t        j                  |      t         j                  k(  }|j                  dk(  rD|j                  d      }|j                          xr  |j                  t         j                        }|j                  |j                        }|r |j                  t         j                        }|S )Nrr  c                       yr  r@   r@   r<   r:   r\   z7meta_max_pool3d_with_indices_backward.<locals>.<lambda>`  r  r<   r   r   r&  c                       yr  r@   r@   r<   r:   r\   z7meta_max_pool3d_with_indices_backward.<locals>.<lambda>h  r  r<   c                       yr  r@   r@   r<   r:   r\   z7meta_max_pool3d_with_indices_backward.<locals>.<lambda>p  r  r<   c                       yr  r@   r@   r<   r:   r\   z7meta_max_pool3d_with_indices_backward.<locals>.<lambda>x  r  r<   r|  c                       yr~  r@   r@   r<   r:   r\   z7meta_max_pool3d_with_indices_backward.<locals>.<lambda>  r  r<   r(  rS  r  r   z"max_pool3d_with_indices_backward()rR  r6  r   )rM   r^   r   r   r   r  rE   r"   r  r   r   r   r   r\  )r*  r  r  r   r  r  r+  r   r  r[  r\  r  r]  r^  r  r  r  r  r}  r~  r  r  r  r  r  r  r  r  r  r.  s                                 r:   %meta_max_pool3d_with_indices_backwardr  R  s    
LLKF"_ 
QB;1$+a.B;1$+a.B	LL
+c&kV+c vayBc&kQ&6F1IBc&kQ&6F1IB	LLG[ 
B7|q gajB7|q gajB	LLH\ I ]a/	Xa[I ]a/	Xa[I	LL

fK
 jjnGJJrNEjjnGZZ^FR Er"Gb!F#








,/6 	

aXE77>%BXBXX  zzQ$)OOA$6!)7799
'5500 6 
 	 -J]]1G1G]H
r<   gridc                 z    t        j                   j                  j                  k(   fd       t        j                   j                  t         j                  k(  xr j                  t         j                  k(   fd       t        j                   j
                  d   j
                  d   k(   fd       t        j                  j
                  d    j                  dz
  k(   fd       t        d j                        D ],  t        j                   j
                     dkD   fd       . y )	Nc                  <    dj                    d j                    S )NzNgrid_sampler(): expected input and grid to be on same device, but input is on z and grid is on r  r  r  s   r:   r\   z+check_grid_sampler_common.<locals>.<lambda>  s"    \\N"24;;-Ar<   c                  <    dj                    d j                    S )NzTgrid_sampler(): expected input and grid to have torch.strided layout, but input has z and grid has )ry   r  s   r:   r\   z+check_grid_sampler_common.<locals>.<lambda>  s!    nT[[MCr<   r   c                  <    dj                    d j                    S )NzZgrid_sampler(): expected grid and input to have same batch size, but got input with sizes  and grid with sizes r   r  s   r:   r\   z+check_grid_sampler_common.<locals>.<lambda>  s"      %},A$**Or<   r   r&  c                  B    dj                   dz
   d j                   S )Nz+grid_sampler(): expected grid to have size r&  z, in last dimension, but got grid with sizes )r   r   r  s   r:   r\   z+check_grid_sampler_common.<locals>.<lambda>  s'    9%**q.9I J226**?r<   c                  *    dj                    d  dS )NzYgrid_sampler(): expected input to have non-empty spatial dimensions, but input has sizes r  r  r   r  s   r:   r\   z+check_grid_sampler_common.<locals>.<lambda>  r  r<   )rM   r^   rz   ry   r  r   r   r   )r  r  r  s   ``@r:   check_grid_sampler_commonr    s    	LL#	
 
LL%F$++*F	
 
LLA$**Q-'	
 
LL

2%**q.(	
 1ejj!KKNQ	
 "r<   c                       e Zd ZdZdZdZy)GridSamplerInterpolationr   r   r&  N)rr   
__module____qualname__BILINEARNEARESTBICUBICr@   r<   r:   r  r    s    HGGr<   r  interpolation_modec                     t        j                   j                  dk(  xr  j                  j                  k(   fd       t        j                   j                  dk(  xr |t        j                  j
                  k(   d        y )NrR  c                  <    dj                    d j                    S )Nzdgrid_sampler(): expected 5D input and grid with same number of dimensions, but got input with sizes r  r   r  s   r:   r\   z'check_grid_sampler_3d.<locals>.<lambda>  s!    449KK=#DJJ<1r<   c                       y)Nz<grid_sampler(): bicubic interpolation only supports 4D inputr@   r@   r<   r:   r\   z'check_grid_sampler_3d.<locals>.<lambda>  r  r<   )rM   r^   r   r  r  rA  )r  r  r  s   `` r:   check_grid_sampler_3dr    sp    	LL

a3EJJ$))3	
 
LLJJ!O M"&>&F&F&L&LL	
 	Or<   c                     |d   }|r&t        j                  |t         j                        }nd }t        j                  |t         j                        }	||	fS Nr   r   )rM   r  r   r   
r*  r  r  r  padding_modealign_cornersr  input_requires_gradr.  	grad_grids
             r:   grid_sampler_2d_backward_metar    sQ     &a.%%e5;R;RS

  U5L5LMI	""r<   c                     t        | |       t        | ||       | j                  d   }| j                  d   }|j                  d   }|j                  d   }|j                  d   }	| j                  |||||	f      S )Nr   r   r&  r2   )r  r  r   r   )
r  r  r  r  r  r  Cout_Dout_Hout_Ws
             r:   grid_sampler_3dr    sv     eT*%'9:AAAAJJqMEJJqMEJJqME??Aq%677r<   r  c                     t        ||       t        |||       |d   }|r&t        j                  |t        j                        }nd }t        j
                  |t        j                        }	||	fS r  )r  r  rM   r  r:  r   r  s
             r:   grid_sampler_3d_backwardr    sm     eT*%'9:%a.%%!?!?

 
  U5S5STIy  r<   c                     |j                  dd       }|st        j                  |      }||d<   t        j                  | g|i |S )NrU   )rT   rE   	get_dtyperM   r   )r   r  rG   r  rU   s        r:   fullr  7  sE    JJw%E
+F7O;;t-d-f--r<   c                 N   |t         j                  k(  rt        j                  |d u d        t        j                  d|| j                  n|||| j
                  n||      }| j                  r>|j                  | j                         | j                         | j                                n/|j                  | j                         | j                         d       |j                  d       |S t        j                  j                  | |||||      }|j!                  d       |S )Nc                       y)Nz9memory format option is only supported by strided tensorsr@   r@   r<   r:   r\   zzeros_like.<locals>.<lambda>M  r  r<   r   r@  Tr  )rM   
sparse_coor^   r   rU   rz   	is_sparsesparse_resize_and_clear_r   
sparse_dim	dense_dimr}   _coalesced_r/   r   r^  fill_)r   rU   ry   rz   r{   r   r  s          r:   r  r  A  s     !!!T!O	

 kk %$**5"(.4;;f!
 >>((		T__.0@ ((dhhj!D

//
!
!# " C IIaLJr<   rx   c                    |t        j                         }|t        j                         }|t         j                  }t        j                  | ||||      S rB  rM   r   get_default_devicer  r   r   rU   ry   rz   r{   r|   s         r:   	meta_onesr  n  T     }'')~))+~;;E&J r<   c                    |t        j                         }|t        j                         }|t         j                  }t        j                  | ||||      S rB  r   r  s         r:   
meta_zerosr    r  r<   c                     ddl m}  j                         }t        j                  |dk7  d        dk\  rn|z    j                        }t        j                   | |kD        xs  ||k\          fd       dk\  rn|z   t         j                               }t         j                               } j                         |   z  z   }|= |=  j                  |||      S )Nr   r  c                       y)Nz-select() cannot be applied to a 0-dim tensor.r@   r@   r<   r:   r\   zmeta_select.<locals>.<lambda>  s    ?r<   c                  6    d dj                          d  S )Nzselect(): index z! out of range for tensor of size z at dimension r   r}   r   r   s   r:   r\   zmeta_select.<locals>.<lambda>  s#    "5')J99;-~cU,r<   )
r   r  r}   rM   r   r   r   r   r  r   )	r   r}   r   r  r   r   new_sizer   new_storage_offsets	   ```      r:   meta_selectr    s    J88:D		?
 #sTzC99S>D	 %$/V3GQU3V	
	,	 aZEUT\EDIIK Hdkkm$J,,.C1HH3??8Z1CDDr<   c                 ,    t        j                  |       S r7   rE   clone_preserve_strides)r   r_  r}   r   s       r:   meta_select_scatterr        ''--r<   c                 ,    t        j                  |       S r7   r  )r   r_  r}   rt   rs   steps         r:   meta_slice_scatterr    r  r<   dim_post_exprwrap_scalarc                 v    |dk  r|sJ d}| }|dz
  }| |k  s| |kD  rJ d|  d| d| d       | dk  r| |z  } | S )Nr   r   zdim z out of bounds (ro   rp   r@   )r}   r  r  r/  r   s        r:   r   r     sm    {.C
!
Cc	S3YR4u4DSEC5PQ)RR'
Qw}Jr<   c                 J    | j                         dk(  rdS | j                  |   S r  r:  )r  r}   s     r:   ensure_nonempty_sizer    s!    11.!''#,.r<   c                 :    t         j                         d      }t        j                         d      }t        j                  ||k(  d        t	        |      D ];  k7  s	t        j                  t              t               k   fd       = y )Nr   c                       y)NzDIndex tensor must have the same number of dimensions as input tensorr@   r@   r<   r:   r\   z$gather_shape_check.<locals>.<lambda>  s    Vr<   c                  N    d dj                    dj                    d  z   S )Nz!Size does not match at dimension z expected index  to be no larger than self  apart from dimension r   )r}   r  r   r   s   r:   r\   z$gather_shape_check.<locals>.<lambda>  s5    ;A3>Nu{{m\/

|;QRUQVWXr<   )r   r}   rM   r^   r   r  )r   r}   r   	self_dims
index_dimsr  s   ```  @r:   gather_shape_checkr"    s    DHHJ"IUYY[!$J	LLZV 98LL$UA.2FtQ2OOX r<   c                 p   ddl m} t        || j                               } |j	                         dk(        }|s`t        j                  j                  t
        j                  k(  xs j                  t
        j                  k(  fd       t        | |       | j                  j                        S )Nr   r  c                  "    d j                    S )Nz8gather(): Expected dtype int32/int64 for index, but got r   r   s   r:   r\   zmeta_gather.<locals>.<lambda>  s    Nu{{m\r<   )r   r  r   r}   r   rM   r^   rU   r   r  r"  r   r   )r   r}   r   sparse_gradr  wrapped_dimis_index_emptys     `    r:   meta_gatherr(    s    J dhhj1K)%++-1*<=NKK5::%A		)A\	
 	4e4>>%++&&r<   c                     |r6| dk(  ry| dk(  ry| dk(  ry| dk(  ry| d	k(  ry
t        j                  dd        y | dk(  ry| dk(  ryt        j                  dd        y )Nr  
REDUCE_ADDr  REDUCE_MULTIPLYmeanREDUCE_MEANamaxREDUCE_MAXIMUMaminREDUCE_MINIMUMFc                       y)Nz=reduce argument must be either sum, prod, mean, amax or amin.r@   r@   r<   r:   r\   z#get_operator_enum.<locals>.<lambda>  s    Sr<   addmultiplyc                       y)Nz/reduce argument must be either add or multiply.r@   r@   r<   r:   r\   z#get_operator_enum.<locals>.<lambda>  s    $Ur<   r  )reduce_use_new_optionss     r:   get_operator_enumr8    s{    e$ ##S	
 	e
"$UUVr<   c                 P    ddl m}  ||j                         dk7        rSt        j                  |j
                  t        j                  k(  xs |j
                  t        j                  k(   fd       |1t        j                  |j
                  |j
                  k(   fd       y y )Nr   r  c                        dS )Nz((): Expected dtype int32/int64 for indexr@   method_names   r:   r\   z,scatter_gather_dtype_check.<locals>.<lambda>  s    {m#KLr<   c                        dS )Nz0(): Expected self.dtype to be equal to src.dtyper@   r;  s   r:   r\   z,scatter_gather_dtype_check.<locals>.<lambda>  s    {m#STr<   )r   r  r   rM   r^   rU   r   r  )r<  r   r   src_optr  s   `    r:   scatter_gather_dtype_checkr?    sv    JEKKMQ./KK5::%A		)AL	

 JJ'--'T	
 r<   c                     t        | d      S rd  )r   r   s    r:   ensure_nonempty_dimrA  "  s    sA;r<   c                     ddl m}  |j                         dk(        ry t        j                  t         j                               t        j                               k(  d        d}t         j                               }t        |      D ]'  }t        |      }|k(  r|t         |      kD  s%d} n |s1/t        |      D ]!  }t        |      }|t        |      kD  sd} n ft        j                  t         j                               t        j                               k(  d        t        j                  |  fd       y t        j                  |  fd       y )	Nr   r  c                       yNzCIndex tensor must have the same number of dimensions as self tensorr@   r@   r<   r:   r\   z%scatter_shape_check.<locals>.<lambda>.  r  r<   FTc                       yrD  r@   r@   r<   r:   r\   z%scatter_shape_check.<locals>.<lambda>H  s    Yr<   c                  b    dj                    dj                    d  dj                    z   S )NExpected index r  r  z and to be no larger than src r   )r}   r   r   r>  s   r:   r\   z%scatter_shape_check.<locals>.<lambda>L  s6    oekk]2Mdjj\Z&se+I'--YZr<   c                  H    dj                    dj                    d  z   S )NrG  r  r  r   r
  s   r:   r\   z%scatter_shape_check.<locals>.<lambda>R  s*    oekk]2Mdjj\Z&se,-r<   )	r   r  r   rM   r^   rA  r}   r   r  )	r   r}   r   r>  r  is_wrong_shaper   r  index_d_sizes	   ````     r:   scatter_shape_checkrK  '  sF   JEKKMQ./	LLDHHJ'+>uyy{+KKU
 N#DHHJ/I 9+E158.tQ77!N  g1y!A/q9L27A>>!%	 " 
+/B599;/OOY	
 	Z	
 	-	
r<   c                     t        || j                               }t        d| ||       t        | |||       |t	        ||       y y )Nscatter)r   r}   r?  rK  r8  )r   r}   r   r_  r6  r7  r&  s          r:   scatter_meta_implrN  X  sE     dhhj1Ky$s;k5#6'?3 r<   c                 V    t        | |||d       | j                  | j                        S Nr3  rN  r   r   r   r}   r   r_  s       r:   meta_scatter_addrS  a  s%    dCU3>>$**%%r<   c                 $    t        | |||d       | S rP  rN  rR  s       r:   meta_scatter_add_rV  g  s    dCU3Kr<   c                     t        |t        j                        r|nd }t        | ||||       | j	                  | j
                        S r7   )rg   rM   r   rN  r   r   r   r}   r   src_or_valuer   r_  s         r:   meta_scatterrZ  m  s;     %\5<<@,dCdCV4>>$**%%r<   c                 `    t        |t        j                        r|nd }t        | ||||       | S r7   )rg   rM   r   rN  rX  s         r:   meta_scatter_r\  |  s-     %\5<<@,dCdCV4Kr<   queryr   rA  	dropout_p	is_causalreturn_debug_maskr  c           	      V   | j                  d      }| j                  d      }| j                  d      }	| j                  d      }
|j                  d      }| j                  dd      }t        j                  |      j                  dd      }t        j                  |||	ft        j
                  | j                        }|ra|
dkD  rdnd}t        j                  |	|z        }|dk  rd}n|dk  rd}t        j                  |||	|f| j                  | j                        }n,t        j                  d| j                  | j                        }t        j                  j                  rkt        j                  j                         rMt        j                  d	t        j                  d
      }t        j                  d	t        j                  d
      }nLt        j                  dt        j                  d
      }t        j                  d	t        j                  d
      }||d d |	||||f	S )Nr   r   r&  r2   r  @         r@   rw   )r   r+  rM   r   r   rQ   rz   r  ceilrU   versionhipr*  r  r   rV  )r]  r   rA  r^  r_  r`  r  r  	num_headsmax_seqlen_batch_qhead_dimmax_seqlen_batch_kquery_t	attention	logsumexpblocksize_cmax_seqlen_k
debug_maskseedoffsets                       r:   (meta__scaled_dot_product_flash_attentionrt    s    AJ

1IAzz!}H!ooa#G  )33Aq9I	Y 23kk||I %]cyy!3k!AB$L3&L[[$6E++<<

 [[%++ellK
 }}UZZ446{{2UZZ?Ruzz&A{{Aell6BRu||FC 	
 
r<   	res_shape.c                     t         j                        |k(  r9 j                  dd      }t        j                  |      j                  dd      }|S t        g d fdd      }|D cg c]  }||   	 }}t        t        |            D cg c]  }|j                  |       }}t        j                  | j                   j                        j                  |      }|S c c}w c c}w )Nr   r&  )r   r   r&  r2   c                 *    j                         |    S r7   r  )idxr]  s    r:   r\   z,alloc_with_matching_layout.<locals>.<lambda>  s    %,,.*=r<   Tr   r  )r]   r   r+  rM   r   sortedr   r   r   r   rU   rz   r   )	r]  ru  rl  r  	dim_orderrx  permuted_shaper  final_permutes	   `        r:   alloc_with_matching_layoutr}    s     U[[Y&//!Q'w'11!Q7 J =t
	 5>>IS)C.I>5:3y>5JK5J+5JKkk%++ell

'-
  	 J ?Ks   *C%C*	attn_biascompute_log_sumexpc	           	         | j                  d      }	| j                  d      }
| j                  d      }|j                  d      }|j                  d      }|	|
||f}t        | |      }t        j                  |	|
|ft        j                  | j
                        }t        j                  dt        j                  d      }t        j                  dt        j                  d      }||d d ||||d f	S Nr   r   r&  r   r  r@   rw   r   r}  rM   r   rQ   rz   r   )r]  r   rA  r~  r  r^  r_  r`  r  r  rF  S_QS_KVD_Vru  r  
logsum_exprr  rs  s                      r:   (meta__scaled_dot_product_cudnn_attentionr    s     	

1A

1A
**Q-C88A;D
**R.CAsC I
$UI
6C	
Askk||J ;;rF;D[[5::f=F 	
 
r<   c           	         | j                  d      }| j                  d      }	| j                  d      }
|j                  d      }|j                  d      }||	|
|f}t        | |      }t        j                  ||	|
ft        j                  | j
                        }t        j                  dt        j                  d      }t        j                  dt        j                  d      }||d d |
|||d f	S r  r  )r]  r   rA  r~  r^  r_  r`  r  r  H_Qr  r  r  ru  r  r  rr  rs  s                     r:   5meta__scaled_dot_product_fused_attention_overrideabler    s     	

1A
**Q-C
**Q-C88A;D
**R.CCc"I
$UI
6C	
Ckk||J ;;rF;D[[5::f=F 	
 
r<   r  rn  	cum_seq_q	cum_seq_kmax_qmax_kphilox_seedphilox_offsetc                 J   t        j                  |j                  dd            j                  dd      }t        j                  |j                  dd            j                  dd      }t        j                  |j                  dd            j                  dd      }|||fS r  )rM   r   r+  )r  r]  r   rA  r  rn  r  r  r  r  r^  r_  r  r  r  grad_qgrad_kgrad_vs                     r:   'meta__scaled_dot_product_flash_backwardr  6  s    , eooa34>>q!DFcmmAq12<<QBFeooa34>>q!DF66!!r<   	attn_maskc                     | j                  d      }| j                  d      }| j                  d      }	t        j                  |       }
t        j                  ||	|ft        j                  | j
                        j                  dd      }|
|fS )Nr   r   r&  r  )r   rM   r   r   rQ   rz   r+  )r]  r   rA  r^  r_  r  r  r  rh  ri  rm  rn  s               r:   0meta__scaled_dot_product_flash_attention_for_cpur  R  s     AJ

1IA  'I	

 kk|| i1o  	 r<   c
                    |j                  d      }
|j                  d      }|j                  d      }|j                  d      }|j                  d      }t        j                  |
|||fd|j                  |j                        }t        j                  |
|||fd|j                  |j                        }t        j                  |
|||fd|j                  |j                        }|||fS )Nr   r   r2   r&  r   r&  r   r2   r  )r   rM   empty_permutedrU   rz   )r  r]  r   rA  r  rn  r^  r_  r  r  r  rh  rj  len_qlen_kr  r  r  s                     r:   9meta__scaled_dot_product_flash_attention_for_cpu_backwardr  t  s    & AJ

1Izz!}HJJqMEHHQKE!!	Yx0kk||	F !!	Yx0iizz	F !!	Yx0kk||	F 66!!r<   c                 @   | j                  dd      } |j                  dd      }|j                  dd      }| j                  d      }| j                  d      }	| j                  d      }
|j                  d      }t        j                  ||	|
|| j                  | j
                        }t        j                  j                  r&t        j                  j                         r	 |r|	nd}n|rt        j                  |	dz        dz  nd}t        j                  ||
|ft        j                  | j
                        }|j                  dd      }t        j                  dt        j                  d	      }t        j                  dt        j                  d	      }||||fS )
Nr   r&  r   r  r   r  rD  r@   rw   )r+  r   rM   r   rU   rz   rf  rg  r*  r  r  re  rQ   r   )r]  r   rA  r~  r  r^  r_  r  r  r  rh  Kvr  logsumexp_dimr  rr  rs  s                    r:   ,meta__scaled_dot_product_efficient_attentionr    sD    OOAq!E
--1
COOAq!E

1A

1A

2I	BB
++aIrU\\
RC}}UZZ446	 0Q2D		!b&)B.!	
I}%kk||J --1
C ;;rF;D[[5::f=F
D&((r<   grad_input_maskc                    |j                  d      }|j                  d      }|j                  d      }|j                  d      }|j                  d      }|j                  d      }t        j                  ||||fd|j                  |j                        }t        j                  ||||fd|j                  |j                        }t        j                  ||||fd|j                  |j                        }d }|~|
d   ry|j                  d      }|dz  dk(  r|n
|dz   |dz  z
  }t        |j                               }||d<   t        j                  ||j                  |j                        }|d	d |f   }||||fS )
Nr   r   r&  r2   r  r  r   r  .)r   rM   r  rU   rz   r   r   )r  r]  r   rA  r~  r  rn  r  r  r^  r  r_  r  r  rh  r  rj  
head_dim_vr  r  r  r  	grad_biaslastDimlastDimAligned	new_sizess                             r:   +meta__scaled_dot_product_efficient_backwardr    s{   ( AJ

1IJJqMEzz!}HAJHHQKE!!	Yx0kk||	F !!	Yx0iizz	F !!	Yz2kk||	F I!3..$$+bLA$57R<'TV,;V)*	&	"KKY__Y5E5E
	 c8G8m,	669,,r<   c                     t        j                  |      }t        j                  |      }t        j                  |      }|||fS r7   r  )r  r]  r   rA  r  rn  r  r  r~  r  r  r  r  r^  r_  r  r  r  r  s                      r:   'meta__scaled_dot_product_cudnn_backwardr    sA    . e$Fc"Fe$F66!!r<   window_size_leftwindow_size_right	seqused_kalibi_slopesc                    || j                  d      n|j                         dz
  }|| j                  d      n|}||j                  d      n|}| j                  d      }| j                  d      }t        j                  |       }|4t        j                  |||ft        j
                  | j                        }nC| j                  d      }t        j                  ||ft        j
                  | j                        }|	ra|dkD  rdnd}t        j                  ||z        }|dk  rd}n|dk  rd}t        j                  ||||f| j                  | j                        }n,t        j                  d| j                  | j                        }d	\  }}t        j                  j                  rkt        j                  j                         rMt        j                  d
t        j                  d      }t        j                  d
t        j                  d      }nLt        j                  dt        j                  d      }t        j                  d
t        j                  d      }|||||fS )Nr   r   r  r   r  rb  rc  rd  NNr@   rw   r&  )r   r   rM   r   r   rQ   rz   r  re  rU   rf  rg  r*  r  r   rV  )r]  r   rA  r  r  r  r  r^  r_  r`  r  r  r  r  r  r  ri  rk  rh  rj  rm  rn  total_qro  rp  rq  rr  rs  s                               r:   meta__flash_attention_forwardr  )  s   4 #,"3A9JQ9NJ*3*;A(1(9!u

2Izz"~H   'IKK$67++<<
	 **Q-KK ELL
	 %]cyy!3k!AB$L3&L[[$6E++<<

 [[%++ellK
 LD&}}UZZ446{{2UZZ?Ruzz&A{{Aell6BRu||FC r<   c                     t        j                  |      }t        j                  |      }t        j                  |      }|||fS r7   r  )r  r]  r   rA  r  rn  r  r  r  r  r^  r_  r  r  r  r  r  
grad_querygrad_key
grad_values                       r:   meta__flash_attention_backwardr  y  sA    0 !!%(J$H!!%(Jx++r<   cu_seqlens_qcu_seqlens_kmax_seqlen_qrp  custom_mask_typecausal_diagonalseqlen_kwindow_sizec                    | j                  d      }| j                  d      }|j                  d      }| j                  d      }|j                  d      }t        j                  ||||| j                  | j                        }||j                  d      dz
  n|}|}||J |}||n|}|
rt        j                  |dz        dz  nd}t        j                  |||ft        j                  | j                        }t        j                  dt        j                  d      }t        j                  dt        j                  d      }||||||fS )	Nr   r   r  r   r  rD  r@   rw   )	r   rM   r   rU   rz   r  re  rQ   r   )r]  r   rA  rp  r  r  r  rp  r^  r  r  r  r  r  r  r  r  r  rh  r  r  logsumexp_batch_dimactual_max_seqlen_qactual_max_seqlen_kr  r  rr  rs  s                               r:   !meta__efficient_attention_forwardr    s9   , 	

1A

1AA

2I	BB
++aIrU\\
RC7C7O,++A.2VW'''**6*B,4F		%*+b0A  	i7kk||J ;;rF;D[[5::f=F
D&*=?RRRr<   bias_requires_gradnum_splits_keyshared_storage_dqdkdvc                    |rt        j                  |j                  d   |j                  d   k(  d        t        j                  |j                  d   |j                  d   k(  d        t        j                  g |j                  dd d|j                  d   |j                  d   |j                  |j
                        }|j                  d	d      }|j                  d	d      }|j                  d	d
      }n?t        j                  |      }t        j                  |      }t        j                  |      }|z|j                  d      }|dz  dk(  r|n
|dz   |dz  z
  }t        |j                               }||d<   t        j                  ||j                  |j
                        }|dd |f   }n!t        j                  d|j
                        }||||fS )Nr   c                       y)Nz,seqlen must match for `shared_storage_dqdkdvr@   r@   r<   r:   r\   z4meta__efficient_attention_backward.<locals>.<lambda>  s    Br<   r2   c                       y)Nz3embedding dim must match for `shared_storage_dqdkdvr@   r@   r<   r:   r\   z4meta__efficient_attention_backward.<locals>.<lambda>  s    Ir<   r   r  r   r  rS  r&  r  .r@   r  )
rM   r^   r   r   rU   rz   r  r   r   r   )r  r]  r   rA  rp  r  r  r  rp  rn  r^  r  r  r  r  r  r  r  chunkr  r  r  r  r  r  r  s                             r:   "meta__efficient_attention_backwardr    s   2 KKNciil*B	
 	KKNciil*I	
 Eekk!BEEEKKOEU[[_E++<<

 \\"a(
<<A&\\"a(
%%e,
##C(%%e,
))B-$+bLA$57R<'TV,;V%	&	"KK	DKKP	c8G8m,	KK5<<8	xY66r<   scale_ascale_bscale_resultuse_fast_accumc                 
    d }t        j                   j                         dk(  xr j                         dk(   fd       t        j                   | j                        xr  |j                         fd       t	               dk(  r%d }	d }
d }t        j                   |	 j                               xs  |        fd	       t        j                   |
j                               xs  |      fd
       t        j                   j                  d      dz  dk(   fd       t        j                  j                  d      dz  dk(  xr j                  d      dz  dk(  fd        j                  \  }j                  d      j                  t         j                  k(  xr j                  t         j                  k(  xs< j                  t         j                  k(  xr j                  t         j                  k(  }j                         dk(  rfj                         dk(  rSt        j                  j                  t         j                  k(  xr j                  t         j                  k(  d        n&|rj                  t         j                  k(  rd}|dz  }nd}d}d } |||      } ||d      dz  }| ||      z  |z  | ||      z  |z  j                         k(  r_j                         k(  rLt        j                  j                         d        t        j                  j                         d        nSt        j                  dfd       n5t        j                  j                  t         j                  k(  xr j                  t         j                  k(  d        t        j                  j                         dk(  xr j                         dk(  fd       j                  d      k(  rtj                  d      dk(  r`j                  d      dk(  rLj                  d      k(  r8t        j                  j                         xr j                         d        nt        j                  dfd       ||n j                  }t        j                   j                  d      j                  d      | j                        S )Nc                     | t         j                  t         j                  t         j                  t         j                  t         j
                  fv S r7   )rM   r  float8_e5m2float8_e4m3fnuzfloat8_e5m2fnuzfloat4_e2m1fn_x2r   s    r:   is_fp8_or_fp4_typez*meta_scaled_mm.<locals>.is_fp8_or_fp4_type  sA    !!!!""
 
 	
r<   r&  c                  L    dj                          d j                          S )Nz%Inputs must be 2D but got self.dim()=z and mat2.dim()=r   r|  r   s   r:   r\   z meta_scaled_mm.<locals>.<lambda>  s%    7
|CSTXT\T\T^S_`r<   c                  <    dj                    d j                    S )Nz?Expected both inputs to be fp8 or fp4 types but got self.dtype=z and mat2.dtype=r   r  s   r:   r\   z meta_scaled_mm.<locals>.<lambda>"  s$    QRVR\R\Q]]mnrnxnxmyzr<   r*  c                 ,    | d   | d   kD  xr | d   dk(  S r  r@   r  s    r:   is_row_majorz$meta_scaled_mm.<locals>.is_row_major'  s"    !9vay(;VAY!^;r<   c                 &    | d   dk(  xr | d   dkD  S r  r@   r  s    r:   is_col_majorz$meta_scaled_mm.<locals>.is_col_major*  s    !9>3fQi!m3r<   c                 V    | j                  d      dk(  xs | j                  d      dk(  S r  r   )	tensor_2ds    r:   has_zero_dimz$meta_scaled_mm.<locals>.has_zero_dim-  s)    >>!$)CY^^A->!-CCr<   c                  *    d j                          S )Nz#self must be row_major, got stride r  r   s   r:   r\   z meta_scaled_mm.<locals>.<lambda>2      9$++-Ir<   c                  *    d j                          S )Nz#mat2 must be col_major, got stride r  r|  s   r:   r\   z meta_scaled_mm.<locals>.<lambda>6  r  r<   r   r  r   c                  ,    d j                  d       S )NzBExpected self.size(1) to be divisible by 16, but got self.size(1)=r   r   r   s   r:   r\   z meta_scaled_mm.<locals>.<lambda>:  s    XY]YbYbcdYeXfgr<   c                  "    d j                    S )Nz>Expected both dimensions of mat2 to be divisble by 16 but got r   r  s   r:   r\   z meta_scaled_mm.<locals>.<lambda>>  s    TUYU_U_T`ar<   c                       y)NzNFor tensorwise scaling, both scale_a and scale_b must be float (fp32) tensors.r@   r@   r<   r:   r\   z meta_scaled_mm.<locals>.<lambda>R  s    hr<   rD  rc  c                     | |z   dz
  |z  S rd  r@   r:  s     r:   ceil_divz meta_scaled_mm.<locals>.ceil_divb  s    A	a''r<   r6  c                       y)Nzscale_a must be contiguousr@   r@   r<   r:   r\   z meta_scaled_mm.<locals>.<lambda>u      8r<   c                       y)Nzscale_b must be contiguousr@   r@   r<   r:   r\   z meta_scaled_mm.<locals>.<lambda>y  r  r<   Fc            	      Z    d  dj                          d dj                          d	S )NzTInvalid blockwise scaling configuration. For blockwise scaling, scale_a should have  elements, got z, scale_b should have r  r  )expected_a_sizeexpected_b_sizer  r  s   r:   r\   z meta_scaled_mm.<locals>.<lambda>~  sC    FFUEVVefmfsfsfuev w//>.?w}}N__`br<   c                       y)NzKFor rowwise scaling, both scale_a and scale_b must be float (fp32) tensors.r@   r@   r<   r:   r\   z meta_scaled_mm.<locals>.<lambda>  s    er<   c                  L    d j                         dj                         S )NzLFor non-tensorwise scaling, scale tensors must be 2D, but got scale_a.dim()=z and scale_b.dim()=r   )r  r  s   r:   r\   z meta_scaled_mm.<locals>.<lambda>  s*    gY`YdYdYfXhh|nunynyn{m}~r<   c                       y)Nz@Both scale_a and scale_b must be contiguous for rowwise scaling.r@   r@   r<   r:   r\   z meta_scaled_mm.<locals>.<lambda>  s    ^r<   c                      d  d dj                  d       dj                  d       dj                  d       dj                  d       dS )	Nz}Invalid scaling configuration. For tensorwise scaling, both scales should be scalar. For rowwise scaling, scale_a should be (z, 1), scale_b should be (1, z). Got scale_a.size()=(r   ro   r   z) and scale_b.size()=(rp   r   )r  r>  r  r  s   r:   r\   z meta_scaled_mm.<locals>.<lambda>  sf    CCD#Eabcad e//6||A.?r',,q/AR S//6||A.?r',,q/ARRS	Ur<   r  )rM   r^   r}   rU   r.  r   r   r   float8_e8m0fnur  r   r  r   r   rz   )r   r|  r  r  rp  r  rr  r  r  r  r  r  _kis_blockwise_scalingblock_size_kblock_size_mnr  num_k_blockspadded_num_k_blocks
_out_dtyper  r  r  r>  s   ````                @@@@r:   meta_scaled_mmr    s   
 
LL
a+DHHJ!O` 
LL4::&I+=djj+Iz
 4F"	<	4	D 	'=<+=I	
 	'=<+=I	
 	IIaL2"g	
 	IIaL2"=tyy|b'8A'=a	
 

2IIaL MMU111 6!5!55 
 MMU000 5!4!44 	 ==?aGMMOq$8LL.Q7==EMM3Qh " }} 3 33  "!V!M( $B5L"*<";a"? M ::=PP  M ::=PP 
 ?2MMO6))+8 ))+8
  LL.Q7==EMM3Qe
 LL"9w{{}'9~ Q1$LLOq(LLOq(LLOq( ))+G0E0E0G^ 	 (3J;;tyy|TYYq\DKKXXr<   c                 Z    t        | ||||d       | j                  | j                        S NT)r7  rQ  r   r}   r   r_  r   r  s         r:   meta_scatter_reduce_twor    s)     dCVTJ>>$**%%r<   c                 (    t        | ||||d       | S r  rU  r  s         r:   meta_scatter_reduce__twor    s    dCVTJKr<   c                t    t        j                  d j                         cxk  xr dk  nc  fd        j                         dk(  r0t        j                  |t         j                   j
                        S t        j                   j                  d      |t         j                   j
                        S )Nr   r&  c                  *    d j                          S )Nz@The probabilty distributions dimensions must be 1 or 2, but got r   r  s   r:   r\   z"meta_multinomial.<locals>.<lambda>  s    RSXS\S\S^R_`r<   r   r  )rM   r^   r}   r   r   rz   r   )r  num_samplesreplacementr;  s   `   r:   meta_multinomialr
    s|     
LL	EIIK1` yy{a{{;ejjNN;;

1{%**U\\ r<   c                 "    d}| D ]  }||z  }	 |S rd  r@   )vsr  vs      r:   multiply_integersr    s    	A	Q Hr<   c                 L    t        j                  t              k(  fd       dz   t        j                  t               k(   fd       t        j                  t        d  dd  D              xr t        d D               fd        d d \  }}||gS )Nc                  &    d  dt               S )Nz%It is expected output_size equals to , but got size r  )num_spatial_dimsr  s   r:   r\   z'upsample_common_check.<locals>.<lambda>  s    78H7IY\]hYiXjkr<   r&  c                  &    d  dt               S )Nz$It is expected input_size equals to r  r  )expected_input_dimsro  s   r:   r\   z'upsample_common_check.<locals>.<lambda>  s    67J6K?[^_i[jZklr<   c              3   &   K   | ]	  }|d kD    ywr  r@   ri   r   s     r:   rk   z(upsample_common_check.<locals>.<genexpr>  s     *>aAE>r  c              3   &   K   | ]	  }|d kD    ywr  r@   r  s     r:   rk   z(upsample_common_check.<locals>.<genexpr>  s     2N+Q1q5+r  c                      d  d S )NzDInput and output sizes should be greater than 0, but got input size z and output size r@   )ro  r  s   r:   r\   z'upsample_common_check.<locals>.<lambda>  s      \!2;-Ar<   )rM   r^   r   r  )ro  r  r  r  channelsr  s   ```  @r:   upsample_common_checkr    s    	LLK,,k +Q.	LLJ..l
 
LL*:ab>**Ns2N+2N/N	A ""1~FHH+{++r<   c                 4    t        j                   j                         dk7  xs t         j	                         dd         fd       t         j	                         |d      } j                  |      j                  t        j                               S )Nr   r   c                  *    d j                          S )Nz>Non-empty 3D data tensor expected but got a tensor with sizes r   r  s   r:   r\   z$upsample_nearest1d.<locals>.<lambda>      PQVQ[Q[Q]P^_r<   r  r   
rM   r^   r   r  r   r  r   r\  rE   r"   )r  r  scalesfull_output_sizes   `   r:   upsample_nearest1dr"         
LLA/

QR0@A_ -

kA ??+,//11%8 0  r<   c                     t        j                   j                         dk7  xs t         j	                         dd         fd       t         j	                         |d      } j                  |      }t        j                         } j                  \  }}}} j                  j                  dk(  r|dk  rt         j                  }|j                  |      }|S )	Nr   r   c                  *    d j                          S Nz>Non-empty 4D data tensor expected but got a tensor with sizes r   r  s   r:   r\   z$upsample_nearest2d.<locals>.<lambda>  r  r<   r&  r  r*  r6  r   )rM   r^   r   r  r   r  r   rE   r"   r   rz   rq   r   
contiguous)	r  r  scales_hscales_wr!  r5  r   rH   
n_channelss	   `        r:   upsample_nearest2dr+    s     
LLA/

QR0@A_ -

kA __-.F //6M  ++Az1a||F"zA~//];FMr<   r  ro  r(  r)  c                 X    t        ||d      t        j                   j                  dk(   fd       t	        d      D ]2  t        j                   j                           k(   fd       4  j                  |      j                  t        j                               S )Nr&  r  r6  c                  "    d j                    S NzFExpected grad_output to be a tensor of dimension 4 but got: dimension r2  r  s   r:   r\   z-upsample_nearest2d_backward.<locals>.<lambda>      XYdYiYiXjkr<   c            
      D    d d     d dj                         S )NzCExpected grad_output to have the same shape as output; output.size() = z but got grad_output.size(r   r!  r*  r  s   r:   r\   z-upsample_nearest2d_backward.<locals>.<lambda>#  s9      !s$'7':&;,QCtK4D4DQ4G3HJr<   r   )
r  rM   r^   r   r   r   r   r\  rE   r"   )r*  r  ro  r(  r)  r!  r  s   `    @@r:   upsample_nearest2d_backwardr3    s     -K! 
LLAk 1XQ#3A#66	
    ,//11+> 0  r<   c                 4    t        j                   j                         dk7  xs t         j	                         dd         fd       t         j	                         |d      } j                  |      j                  t        j                               S )Nr   r   c                  *    d j                          S )Nz>Non-empty 5D data tensor expected but got a tensor with sizes r   r  s   r:   r\   z$upsample_nearest3d.<locals>.<lambda>5  r  r<   r2   r  r   r  )r  r  scales_dr(  r)  r!  s   `     r:   upsample_nearest3dr7  /  r#  r<   c                    t        j                  |       t        j                  | t         j                        }}||t        |t              sJ t        |t              sJ |j
                  }|j                         }	t        ||      }t        ||      }|j                  ||	       |j                  ||	       t        ||       t        ||       ||fS ||fS )Nr   )r  r  )
rM   r   r   rg   r#   r   r   r%   r  r'   )
r   stabler}   
descendingr   r   r  r  r   
out_strides
             r:   	meta_sortr<  ?  s     D!5#3#3D#LqAg1&*---':... GG	XXZ
"695#GY79j1Iz2F3G4wa4Kr<   c           	          t        j                   j                  dk(   fd       t        j                   j                  j                  k(   fd        j	                  d      t        j                  j                  dk(  fd       t        j                  j                         k(  fd       t        j                  j                  j                  k(  fd       t        j                  j                  dk(  fd        j	                  d	      z  z  t        j                  j                         k(   fd
       t        j                  t         fdfD              d        y )Nr&  c                  "     j                    dS Nz != 2r2  )input_gatess   r:   r\   z%rnn_cell_checkSizes.<locals>.<lambda>b      ;3C3C2DE0Jr<   c                  :    j                    d j                    S N != r   )hidden_gatesr@  s   r:   r\   z%rnn_cell_checkSizes.<locals>.<lambda>e  s    ;$$%T,*<*<)=>r<   r   c                  "     j                    dS )Nz != 1r2  )
input_biass   r:   r\   z%rnn_cell_checkSizes.<locals>.<lambda>i  s    joo5Fe3Lr<   c                  .    j                          d  S rC  r  )
gates_sizerG  s   r:   r\   z%rnn_cell_checkSizes.<locals>.<lambda>l  s    z'')*$zl;r<   c                  :    j                    d j                    S rC  r   )hidden_biasrG  s   r:   r\   z%rnn_cell_checkSizes.<locals>.<lambda>p  s    z''([->->,?@r<   c                  "     j                    dS r?  r2  )prev_hiddens   r:   r\   z%rnn_cell_checkSizes.<locals>.<lambda>r  rA  r<   r   c            
      `    j                          dj                  d       d d d  d
S )NrD  r   z * z // z (aka rp   )r   r   )expected_prev_hidden_numelfactorrI  r@  rM  s   r:   r\   z%rnn_cell_checkSizes.<locals>.<lambda>v  s@    ;$$&'tK,<,<Q,?+@J<tTZS[[ab|a}}~r<   c              3   P   K   | ]  }|j                   j                   k(    y wr7   r  )ri   rJ   r@  s     r:   rk   z&rnn_cell_checkSizes.<locals>.<genexpr>y  s(      
I HH***Is   #&c                       y)Nz%expected all inputs to be same devicer@   r@   r<   r:   r\   z%rnn_cell_checkSizes.<locals>.<lambda>}  s    7r<   )rM   r^   r   r   r   r   r  )r@  rE  rG  rK  rP  rM  rO  rI  s   ``````@@r:   rnn_cell_checkSizesrS  Z  s@    
LL!!Q&(JK	LL\///> !!!$JZ__)+LM*,;	
 	 1 11@	
 
LL!!Q&(JK!,!1!1!!4z!AV!K	LL99 
LL 
"J[I
 	
 	8r<   c                 
   t        | |||d|       t        j                  | t        j                        }t        j                  |t        j                        }t        j                  |t        j                        }|||fS )Nr6  r   )rS  rM   r   r   )r@  rE  cxrG  rK  	workspacehycys           r:   _thnn_fused_lstm_cell_metarY    sk     \:{ArR  E<S<STI			"E,C,C	DB			"E,C,C	DBIr<   c                 b   t        |      dk7  }|r t        |      }|d   }| j                  d   }nB|
r| j                  d   n| j                  d   }|
r| j                  d   n| j                  d   }d}|rdnd}|dk7  r|n|}|r|||z  g}n|
r||||z  gn||||z  g}| j                  |      }|	|z  ||g}|"t        j                  d| j
                        }n|j                  |      }|j                  |	|z  ||g      }|rdnd}| j                  |t        j                        }|||||fS )Nr   r   r   r&  r  r   )r   r   r   rM   r   rz   r  )r  rn  weight_stride0
weight_bufhxrU  r  hidden_size	proj_size
num_layersbatch_firstdropouttrainbidirectionalbatch_sizesdropout_stateis_input_packed
seq_length
mini_batchbatch_sizes_sumnum_directionsout_sizer   r5  
cell_shaperX  rW  reserve_shapereserves                                r:   
_cudnn_rnnrp    sS   & +&!+O%
 ^
++a.'2U[[^A
'2U[[^A
'QQN%NyH$h&?@	  X%>?j(^*CD 	
 __Y'F~-z;GJ	z[[5<<0\\*%	zN2JI	JB AAMoom5;;o?G2r7J..r<   c                 (   |r| j                   d   n| j                   d   }|r| j                   d   n| j                   d   }|
}|r|||gn|||g}| j                  |      }|"t        j                  d| j                        }n|j                  |j                         }|"t        j                  d| j                        }n|j                  |j                         }t        j                  d| j                  t        j
                        }||||fS )Nr   r   r  r   )r   r   rM   r   rz   r  )r  w0w1w2w3hx_cx_r   re  r  r^  r`  
has_biasesrd  ra  rc  rh  ri  output_chanelsr   r5  rW  rX  rV  s                           r:   mkldnn_rnn_layerrz    s    & $/QEKKNJ#.QEKKNJ N  
Z0*n5 
 __Y'F
{[[5<<0]]399%
{[[5<<0]]399%Aell%++FI2r9$$r<   c                     | j                   dk(  r%t        j                  dk(  xs dk(  fd       y t        j                  | j                        dk7  fd       y )Nr   r   c                       d  S )Nz4: Expected reduction dim -1 or 0 for scalar but got r@   r}   r  s   r:   r\   z'zero_numel_check_dims.<locals>.<lambda>  s    wiSTWSXYr<   c                       d  dS )Nz: Expected reduction dim z to have non-zero size.r@   r}  s   r:   r\   z'zero_numel_check_dims.<locals>.<lambda>  s    wi8=TUr<   )r   rM   r   r   )r   r}   r  s    ``r:   zero_numel_check_dimsr    sR    yyA~1H!r	Y	

 	IIcNaU	
r<   c                      |(t        ||j                               }t        ||        y t        j                  |j                         dk7   fd       y )Nr   c                        dS )Nz@: Expected reduction dim to be specified for input.numel() == 0.r@   rB  s   r:   r\   z%check_argmax_argmin.<locals>.<lambda>  s    tf\]r<   )r   r}   r  rM   r^   r   )r  r   r}   s   `  r:   check_argmax_argminr    sC    
S$((*-dC.JJLA]	
r<   c                     t        d| |       t        j                  | j                  ||fnd       }t	        | ||      }| j                  |t        j                        S )Nargmaxr   )r  rE   r  r   r  r   rM   r   )r   r}   r  r  r   s        r:   argmax_argmin_metar  	  sQ    $,

coSF4PD$T49E>>%u{{>33r<   c                 |    |t         j                  k(  rt         j                  }t        j                  d||||      S )Nr@   r@  )rM   jaggedr  r   )r   rU   ry   rz   r{   s        r:   scalar_tensorr    s5    
 ;;
%v* r<   c                    t        || j                         d      }| j                         dk(  rdn| j                  |      }t        j                  |       t        j
                  ||k  d        t        | j                        }t        |      dkD  r|||<   | j                  |      | j                  |t        j                        fS )NTr  r   r   c                       y)Nzk not in range for dimensionr@   r@   r<   r:   r\   ztopk_meta.<locals>.<lambda>#  s    )Gr<   r   )r   r}   r   rM   r  r^   r   r   r   r   r   )r   r  r}   largestry  	sliceSizetopKSizes          r:   	topk_metar    s     dhhjd
;CXXZ1_$))C.I		LLi!GHDJJH
8}q>>(#T^^HEKK^%PPPr<   c                     |	|J d       |j                         }| j                         }	t        j                  ||	j                  |	j                  |	j
                        S )Nz;segment_reduce(): Either lengths or offsets must be defined)rU   rz   ry   )r'  rM   r   rU   rz   ry   )
ro  r5  r  r   r  r  r  r  data_contiggrad_contigs
             r:   meta__segment_reduce_backwardr  +  sj    
 '"5 E5 //#K//#K!!!!	 r<   c                    ddl m} t        | j                         d      | j                         dkD  r| j	                        nd}t        j                   ||dk\  ||k        fd       t        | j                  d  | j                  dz   d  z         }|r%| j                         dkD  r|j                  d       | j                  |      | j                  |t
        j                        fS )Nr   )sym_andTr  r   c                      d  S )Nz9kthvalue(): selected number k out of range for dimension r@   r   s   r:   r\   zkthvalue_meta.<locals>.<lambda>F  s    KC5Qr<   r   )r   r  r   r}   r   rM   r^   r   r   re  r   r   )r   r  r}   r  r  dimSizer   s     `    r:   kthvalue_metar  =  s     >
dhhjd
;C $
QdiinAG	LLQW%Q
 DS!DJJsQwy$99:E488:>S!>>% $..ekk."JJJr<   c                    | | n|}t        j                  |j                         dk(  d        |j                         }| (t        j                  | j                         |k(  d        |(t        j                  |j                         |k(  d        t        j                  |j                         |k(  d        t        j                  |j                         |k(  d        t        j                  |j                         dk(  d        t        j                  |j	                         |d   |d	   z  d
z  k(  d        y )Nr&  c                       yN r@   r@   r<   r:   r\   z(checkLSTMBackwardSizes.<locals>.<lambda>U  s    "r<   c                       yr  r@   r@   r<   r:   r\   z(checkLSTMBackwardSizes.<locals>.<lambda>X      r<   c                       yr  r@   r@   r<   r:   r\   z(checkLSTMBackwardSizes.<locals>.<lambda>Z  r  r<   c                       yr  r@   r@   r<   r:   r\   z(checkLSTMBackwardSizes.<locals>.<lambda>[      r<   c                       yr  r@   r@   r<   r:   r\   z(checkLSTMBackwardSizes.<locals>.<lambda>\  r  r<   c                       yr  r@   r@   r<   r:   r\   z(checkLSTMBackwardSizes.<locals>.<lambda>]  s    rr<   r   r   r6  c                       yr  r@   r@   r<   r:   r\   z(checkLSTMBackwardSizes.<locals>.<lambda>^  s    Rr<   )rM   r^   r}   r   r   )grad_hygrad_cyrU  rX  rV  defined_gradexp_sizes          r:   checkLSTMBackwardSizesr  S  s    %17wL	LL!!#q(*5  "HW\\^x/<W\\^x/<	LLh&
3	LLh&
3	LLA%z2	LL"hqkHQK&?!&CCZPr<   c                     | |yt        | ||||       t        j                  |t              }t        j                  |t              }|r|j	                  dd      nd }|||fS )NNNNr   r   F)r  )r  rM   r   legacy_contiguous_memory_formatr  )	r  r  rU  rX  rV  has_bias
grad_gatesgrad_cxr  s	            r:   #_thnn_fused_lstm_cell_backward_implr  b  sl    7?7GRY?!!!@J r1PQG4<
q%0$Iw	))r<   c                    d }d }d }|d   r|j                  | j                               }|d   s|d   rQ|j                  |j                  d      | j                  d      f      }|j                  |j                  d            }|||fS )Nr   r   r&  r   r  )r  r   r  r  r.  grad_weightr  s          r:   linear_backwardr  p  s    JKI1~!++FKKM:
1~Q",,l.?.?.CV[[QS_-UV **<+<+<R+@A	Y//r<   c                     t         j                        dkD  r j                  d   ||z  z  dk(  sJ d j                   d|        d  fd} j                  d   ||z  z  } j                  d   |z  } j                  d	   |z  }g  j                  d d |||} j                  |      }|j                   |       
      }|S )Nr&  rS  r   z'Invalid input shape for pixel_shuffle: z with upscale_factor = c                 b    t         j                  j                  |       t         j                  k(  S r7   r  r  s    r:   r  z,meta_pixel_shuffle.<locals>.is_channels_last  s$    ""88=ATATTTr<   c                  2           r.t              dk(  rt        j                  S t        j                  S j	                  t        j                        rt        j                  S j	                  t        j
                        rt        j
                  S y r  )r.  rM   r   r  r   r  )r  r   s   r:   r  z.meta_pixel_shuffle.<locals>.pick_memory_format  s|    D!4 F*...***e.E.EF***e.C.CD((( Er<   r  r   r   )r   r   r   r\  )	r   upscale_factorr  r  HrWrr   r  r  s	   `       @r:   meta_pixel_shuffler  }  s     	DJJ!

2.>2Q RVW W 2$**=TUcTdeW
U	) 	

2>N:;A	B.	(B	B.	(B-$**Sb/-1-b-"-I
..
#C
&&13&
4CJr<   c                 X   | j                  | j                        }|j                  |j                        }|j                  |j                        }|j                  |j                        }|j                  |j                        }|j                  |j                        }|||||||fS r7   r  )r  weight0weight1weight2weight3rv  cx_tmpr5  hy_cy_grad_output_r_optgrad_hy_r_optgrad_cy_r_optr   r  r^  r`  rx  rc  rd  re  ra  rV  diff_xdiff_hxdiff_cxdiff_w1diff_w2diff_bs                                r:   mkldnn_rnn_layer_backwardr    s    4 __U[[)FmmCII&Gv||,G.G.Gw}}-F7GVVWgEEr<   )	out_int32r  c                    t        j                  | |rt         j                  nt         j                  t         j                        S )NrU   r   )rM   r   rv  r   r   )r   
boundariesr  r  s       r:   meta_bucketizer    s2     &ekkEKK-- r<   c                     dt               dk(  r't        j                   j                          fd       t               dk(  r% j                         rt	        j
                  d       t        j                  t        t              fd       t        j                  dkD  fd       t        j                  t        t              fd	       t        j                  t        t              fd
       t        j                  k\  d        t        j                   j                   j                        S )Nzhistc()r  c                  $    d j                    dS )Nz%"histogram_cpu" not implemented for 'r  r   r  s   r:   r\   zmeta_histc.<locals>.<lambda>  s    =ekk]!Lr<   r*  z%_histc_cuda with floating point inputc                  $     dt                S )Nz#: argument 'bins' must be int, not r  binsr  s   r:   r\   zmeta_histc.<locals>.<lambda>  s    7)>tDzlKr<   r   c                       d  S )Nz: bins must be > 0, but got r@   r  s   r:   r\   zmeta_histc.<locals>.<lambda>  s    gY.J4&#Qr<   c                  $      dt               S )Nz%: argument 'min' must be Number, not r  )r  r/  s   r:   r\   zmeta_histc.<locals>.<lambda>      7)@cLr<   c                  $      dt               S )Nz%: argument 'max' must be Number, not r  )r  r   s   r:   r\   zmeta_histc.<locals>.<lambda>  r  r<   c                       y)Nz&{fn_name}: max must be larger than minr@   r@   r<   r:   r\   zmeta_histc.<locals>.<lambda>  s    %Mr<   r   )r.  rM   r^   r-  rE   r  rg   r   r!   r   rz   rU   )r  r  r/  r   r  s   ````@r:   
meta_histcr    s     G5U"##%L	
 5V#(?(?(A%%&MN	LL4!K 
LLQR	LL3L 
LL3L 
LLMN;;tELLDDr<   c                 B    t         j                         |d      }t        j                   j	                         dk7  xs# t        d  j                         dd  D               fd        j                  |      j                  t        j                               S )Nr&  r  r   c              3   &   K   | ]	  }|d kD    ywr  r@   )ri   r   s     r:   rk   z,meta_upsample_bimode2d_aa.<locals>.<genexpr>  s     !H7Gt$(7Gr  r   c                  *    d j                          S r&  r   r  s   r:   r\   z+meta_upsample_bimode2d_aa.<locals>.<lambda>  r  r<   r   )
r  r   rM   r^   r   r  r   r\  rE   r"   )r  r  r  r(  r)  r!  s   `     r:   meta_upsample_bimode2d_aar    s     -

kA 
LLHc!Huzz|AB7G!HH_ ??+,//11%8 0  r<   c                 T    t        ||d      t        j                   j                  dk(   fd       t	        d      D ]0  t        j                   j
                        k(   fd       2  j                  |      j                  t        j                               S )Nr&  r  r6  c                  "    d j                    S r.  r2  r  s   r:   r\   z4meta_upsample_bimode2d_aa_backward.<locals>.<lambda>
  r/  r<   c            
      D    d d     d dj                         S )NzD
Expected grad_output to have the same shape as output; output.size(r1  z
but got grad_output_size(r   r2  s   r:   r\   z4meta_upsample_bimode2d_aa_backward.<locals>.<lambda>  s>     DDE3dK[\]K^J_ `D!1!1!!4 59r<   r   )
r  rM   r^   r   r   r   r   r\  rE   r"   )r*  r  ro  r  r(  r)  r!  r  s   `     @@r:   "meta_upsample_bimode2d_aa_backwardr    s     -K! 
LLAk 1Xa $4Q$779	
    ,//11+> 0  r<   c                 P   t        j                  |j                         dk(  d        t        j                  |j                         dk(  d        t        j                  |j                  j                  d        t        j                  |j                  j                  d        y )Nr   c                       y)Nz%found_inf must be a 1-element tensor.r@   r@   r<   r:   r\   z<_amp_foreach_non_finite_check_and_unscale_.<locals>.<lambda>      (Or<   c                       y)Nz%inv_scale must be a 1-element tensor.r@   r@   r<   r:   r\   z<_amp_foreach_non_finite_check_and_unscale_.<locals>.<lambda>  r  r<   c                       y)Nz!found_inf must be a float tensor.r@   r@   r<   r:   r\   z<_amp_foreach_non_finite_check_and_unscale_.<locals>.<lambda>#      3r<   c                       y)Nz!inv_scale must be a float tensor.r@   r@   r<   r:   r\   z<_amp_foreach_non_finite_check_and_unscale_.<locals>.<lambda>'  r  r<   )rM   r^   r   rU   r-  )r   r  	inv_scales      r:   *_amp_foreach_non_finite_check_and_unscale_r    s|    	LLQ O 
LLQ O 
LL))3 
LL))3r<   c                 V    t        | j                               }| j                  |      S r7   )r   r   r   )r   nanposinfneginfr  s        r:   
nan_to_numr  ,  s#     tyy{#K>>+&&r<   c                    | j                   t        j                  t        j                  t        j                  t        j
                  hvsJ d| j                    d       | j                  }t        ||      }t        ||      }||k(  r| S t        | j                               }t        | j                               }||   ||   c||<   ||<   ||   ||   c||<   ||<   | j                  ||       | S )Nz>torch.transpose_: in-place transposition is not supported for z layout)ry   rM   r  
sparse_cscr  
sparse_bscr   r   r   r   r   r  )r   dim0r  ndimsr   r   s         r:   r  r  3  s    ;;	   IU\]  IIE$&D$&Dt|		D$++- F!'vd|F4L&,!$ZdDJT
T6"Kr<   c                    | j                   }| j                  r8| j                         }| j                         }|dk  r|dk(  s,J d| d| d       | j	                         dk  sJ d| d       t        | d|dk  rd      S d      S )	Nr&  r   zEt_ expects a tensor with <= 2 sparse and 0 dense dimensions, but got z sparse and z dense dimensionsz6t_ expects a tensor with <= 2 dimensions, but self is r1  r   )r   r  r  r  r}   r  )r   r  r  r  s       r:   t_r  P  s    IIE~~__&
NN$	Q9> 	
!l,yk9JL	
1
 xxzQ 	
DUG1M	
 dAEAIq55155r<   )r  r  sidesorterc                \    t        j                  t         j                        dk  xs  j                  d d j                  d d k(   fd       t        j                  d u xs  j                  j                  k(   fd       t        j                  |dk7  xs | d       |rt         j                  nt         j
                  }t        t         j                        r&t        j                  |t         j                        S t        j                  d| j                  	      S )
Nr   r   c                  `    dt        j                         dt         j                         S )Nztorch.searchsorted(): boundaries tensor should be 1 dimension or the first N-1 dimensions of boundaries tensor and input value tensor must match, but we got boundaries tensor z and input value tensor r   r   )r   sorted_sequences   r:   r\   z#meta_searchsorted.<locals>.<lambda>s  s3    3378M8M3N2O P""&tzz"2!35r<   c                  l    dt         j                         dt        j                         S g  S )Nz[torch.searchsorted(): boundary and sorter must have the same size, but got boundary tensor z and got sorter tensor r  )r  r  s   r:   r\   z#meta_searchsorted.<locals>.<lambda>~  sB    ##'(=(=#>"??V%+%7tFLL!@B=?@Br<   r  zetorch.searchsorted(): side and right can't be set to opposites, got side of left while right was Truer  r@   r  )rM   r^   r   r   rv  r   rg   r   r   r   r   rz   )r  r   r  r  r  r  rU   s   ``   ` r:   meta_searchsortedr	  c  s     
LLO!!"a' 	9  "%CR8	
	 
LL$?///6<<?	
 
LL#e)	$ %EKK%++E$%U-D-D
 	
 {{2U?3I3IJJr<   c                      t        j                   t         j                  t         j                  t         j                  fv fd       y )Nc                      d  S )Nz/Unsupported input type encountered for isin(): r@   r   s   r:   r\   z3_check_for_unsupported_isin_dtype.<locals>.<lambda>  s    A%Ir<   )rM   r^   r   
complex128	complex64r   s   `r:   !_check_for_unsupported_isin_dtyper	    s/    	LLejj%"2"2EOODDIr<   c                 J    | j                  || j                  d      f      }|S )Nr   r  )r*  r   num_weightsr  r  r  s         r:   meta_embedding_dense_backwardr		    s*     ''k6F6Fr6J(KLKr<   c                 j    |	rt         j                  | ||||||||
|
      S t        | ||||||||
|
      S r7   )r/   _embedding_bag_sparse_backward!meta_embedding_bag_dense_backward)ro  r   r  r  r  maximum_indicesr	  r  r  r  r  r  s               r:   meta_embedding_bag_backwardr	    se     22
 	
 1
 	
r<   c
                 N    t        j                   j                  t         j                  t         j                  t         j
                  t         j                  fv  fd       |t        k(  rt        j                  |d u        j                  | j                  d      f      }
|
S )Nc                  "    d j                    S )Nz$Unsupported input type encountered: r   )ro  s   r:   r\   z3meta_embedding_bag_dense_backward.<locals>.<lambda>  s    6tzzlCr<   r   )
rM   r^   rU   r  r  r  float64r  r   r   )ro  r   r  r  r	  r	  r  r  r  r  index_grad_weights   `          r:   r	  r	    sv     
LL

u}}ennemmU]]SSC x_D01TYYq\'BCr<   c                    | j                  d      }t        j                  |t        k(  d       t        j                  | j	                         dk(         t        j                  |j	                         dk(         |j                  d      }t        j                  |j	                         dk(         t        j                  |j                  d      |k(         | j                  |f      }	|	S )Nr   zHembedding_bag_backward: per_sample_weights only supported for mode='sum'r&  r   )r   rM   r^   r  r}   r   )
ro  rn  r   r  r  r  r  embedding_featuresr  r5  s
             r:   .meta_embedding_bag_per_sample_weights_backwardr	    s     1	LLR 
LLq!	LL!#$,,q/K	LL"#	LLQ#556^^[N+FMr<   )assume_uniqueinvertc                   t        j                  t        | t              xs t        |t              d        t        | t              s!t        j                  | |j
                        } t        |t              s!t        j                  || j
                        }t        | j                         t        |j                         t        j                  | t         j                        S )Nc                       y)Nz<At least one of elements and test_elements must be a Tensor.r@   r@   r<   r:   r\   zmeta_isin.<locals>.<lambda>  r  r<   r  r   )
rM   r^   rg   r   rf  rz   r	  rU   r   r   )elementstest_elementsr	  r	  s       r:   	meta_isinr	    s     
LL8V$I
=&(IN h'<<1E1EFmV,]8??K%hnn5%m&9&9:HEJJ77r<   r>  c                     t        j                  | dk\  d        t        |t        j                        \  }}t        j
                  ||      S )Nr   c                       y)Nz,polygamma(n, x) does not support negative n.r@   r@   r<   r:   r\   z meta_polygamma.<locals>.<lambda>  s    !Or<   r  r   )rM   r^   r   r   r  r   )r>  r   rH   rI   s       r:   meta_polygammar	    sF     
LLaOP(;HHOA| D55r<   c                     t        d      )Nz.Tensor.item() cannot be called on meta tensors)r[  r   s    r:   meta_local_scalar_denser!	    s    
G
HHr<   c                 ,    t        j                  |       S r7   r  r   s    r:   silur#	  $  r  r<   c                 l    t        | t        j                        \  }}t        j                  | |      S r  )r   r   r  rM   r   )r   rH   rI   s      r:   sigmoidr%	  *  s3     );HHOA| D55r<   c                    | j                         dk(  }|j                         dk(  }|r|r4|j                  d      | j                  d      |j                  d      g}nt        j                  |j                  d      |j                  d      k(  d       | j                  d      |j                  d      g}n|rZt        j                  |j                  d      | j                  d      k(  d       | j                  d      |j                  d      g}nit        j                  | j                  d      |j                  d      k(  d       | j                  d      | j                  d      |j                  d      g}|xs | j                  }d|j
                  z  }|d   |z   dz
  |z  |z  }||k(  r|d   |z  |dg}	n|dg}	t        j                  ||	|| j                        }
|
S )	Nr&  r   r   z matrix batch sizes have to matchr   zbatched dimension has to matchr  r  )r}   r   rM   r^   rU   itemsizerT  rz   )rz  r|  offsrr  
mat1_is_2d
mat2_is_2drl  	alignmentsize_paddedr;  r  s              r:    _create_grouped_mm_output_tensorr-	  4  s   qJqJ		!diilDIIaLAHLL		!		!,.P 		!diim4HLL		!		!,.P 		!diil3H LL1157WX		!diilDIIbMBH'TZZIi(((IB<)+a/I=	IKZqkK/a@
!1%



h
)DKK
XCJr<   mat_amat_br(	  c	                 L    t        j                  |d u |d u k(  d        |d uxr |d u}	|	rUt        j                   j                  t         j                  k(  xr j                  t         j                  k(   fd       nTt        j                   j                  t         j                  k(  xr j                  t         j                  k(   fd       t        j                   j                         dv xr j                         dv  fd        j                         dk(  }
j                         dk(  }|	rDd }d }t        j                   |        fd	       t        j                   |      fd
       d } |d         |d       ||t        j                  |j                  t         j                  k(  xr |j                  t         j                  k(  d        dd}|
r|rj                  d   nd} |d| d|        |d|d|       t        j                  |d u d        |
s|r}t        j                  d u fd       xt        j                  j                         dk(  fd       t        j                  j                  t         j                  k(  fd       nt        j                  d u d        t        j                  |d u d        t        j                  |d u xs |t         j                  k(  d        t         |      S )Nc                       y)Nz,Either both scale factors are given, or noner@   r@   r<   r:   r\   z)_meta_grouped_mm_common.<locals>.<lambda>d  s    >r<   c                  >    d j                    dj                    dS )Nz5Expected inputs of E4M3 FP8 type but got mat_a.dtype= and mat_b.dtype=r  r   r.	  r/	  s   r:   r\   z)_meta_grouped_mm_common.<locals>.<lambda>o  s#    KEKK=Xijojujuivvwxr<   c                  >    d j                    dj                    dS )Nz1Expected inputs of BF16 type but got mat_a.dtype=r3	  r  r   r4	  s   r:   r\   z)_meta_grouped_mm_common.<locals>.<lambda>t  s#    G}Tefkfqfqerrstr<   )r&  r2   c                  L    d j                          dj                          S )Nz3Multiplicands must be 2D or 3D but got mat_a.dim()=z and mat_b.dim()=r   r4	  s   r:   r\   z)_meta_grouped_mm_common.<locals>.<lambda>y  s%    Eeiik]Rcdidmdmdocpqr<   r&  c                 F    | j                         }|d   dkD  xr |d   dk(  S Nr  r   r   r  mat
mat_strides     r:   r  z-_meta_grouped_mm_common.<locals>.is_row_major  s*    Jb>A%=*R.A*==r<   c                 F    | j                         }|d   dk(  xr |d   dkD  S r8	  r  r9	  s     r:   r  z-_meta_grouped_mm_common.<locals>.is_col_major  s*    Jb>Q&=:b>A+==r<   c                  0    d j                         dd   S )NzNExpected mat_a tensor to be row major in the last two dimensions, got strides r  r  )r.	  s   r:   r\   z)_meta_grouped_mm_common.<locals>.<lambda>  s!    dejeqeqestvtwexdyzr<   c                  0    d j                         dd   S )NzQExpected mat_b tensor to be column major in the last two dimensions, got strides r  r  )r/	  s   r:   r\   z)_meta_grouped_mm_common.<locals>.<lambda>  s!    ghmhththvwywzh{g|}r<   c                     j                         dz
  dj                         z  }j                         dz
     dk(  rG   t        dj                  dz
           k\  r%t        j                     |z  dk(   fd       y    dk(  rJdz
     t        dj                           k\  r(t        j                  dz
     |z  dk(   fd       y t        j                  dfd       y )Nr   r  r   c                  "    d d  d     dS )Nr   stride along % dim to be multiple of 16 bytes, got r  r@   end_dimmat_namer;	  s   r:   r\   zF_meta_grouped_mm_common.<locals>.check_valid_strides.<locals>.<lambda>  s'    )H:^G9Dijtu|j}i~~  Ar<   c                  .    d d dz
   d dz
      dS )Nr  rA	  r   rB	  r  r@   rC	  s   r:   r\   zF_meta_grouped_mm_common.<locals>.check_valid_strides.<locals>.<lambda>  sI    )H:^GaK=Hmnx  zA  DE  zE  oF  nG  GH  Ir<   Fc                  *    d d j                    dS )NzInvalid strides/sizes, got z for strides and z for sizes.r   r9	  s   r:   r\   zF_meta_grouped_mm_common.<locals>.check_valid_strides.<locals>.<lambda>  s    5j\ARSVS\S\R]]hir<   )r}   element_sizer   r   r   rM   r^   )rE	  r:	  r+	  rD	  r;	  s   `` @@r:   check_valid_stridesz4_meta_grouped_mm_common.<locals>.check_valid_strides  s    '')a-#**,,	ZZ\
gk"a'Jw,?3syy1%D
 -
 LL7#i/14 A  A%*Wq[*ASsyy!F
 +
 LL7Q;')3q8 I
 LLir<   r.	  r/	  c                       y)NzBoth scale_a and scale_b must be float (fp32) tensors, but got scale_a.dtype={scale_a.dtype} and scale_b.dtype={scale_b.dtype}.r@   r@   r<   r:   r\   z)_meta_grouped_mm_common.<locals>.<lambda>  s      Vr<   r   c                     j                         dk(  rt        j                  j                         dk(   fd       t        j                  j                          fd       t        j                  j                  d   j                     z  k(   fd       y t        j                  j                         dk(   fd       t        j                  j                  d      dk(   fd       t        j                  j                  d   j                  d   k(   fd	       t        j                  j                  d   j                  dz      k(   fd
       y )Nr&  r   c                  2    d d j                          dS )Nr  z to be 1D tensor, but got 	D tensor.r   r  
scale_names   r:   r\   z>_meta_grouped_mm_common.<locals>.check_scale.<locals>.<lambda>      i
|3Meiik]Zcdr<   c                      d  dS )Nr  z to be contiguous.r@   rO	  s   r:   r\   z>_meta_grouped_mm_common.<locals>.check_scale.<locals>.<lambda>  s    i
|3EFr<   r   c                  V    d d j                      z   dj                   d    dS )Nr  z	 to have r  r   z
 elements.r   )r:	  r  scale_multiplierrO	  
scaled_dims   r:   r\   z>_meta_grouped_mm_common.<locals>.check_scale.<locals>.<lambda>  sU    i
|9SYYz=RUe=e<ffuv{  wB  wB  CD  wE  vF  FP  Qr<   c                  2    d d j                          dS )Nr  z to be 2D tensor, but got rM	  r   rN	  s   r:   r\   z>_meta_grouped_mm_common.<locals>.check_scale.<locals>.<lambda>  rP	  r<   c                      d  dS )Nr  z( to be contiguous in the last dimension.r@   rR	  s   r:   r\   z>_meta_grouped_mm_common.<locals>.check_scale.<locals>.<lambda>  s    i
|3[\r<   c                  P    d d j                   d    dj                   d    dS )Nr  z batch dimension to be r   , got r  r   )r:	  r  rO	  s   r:   r\   z>_meta_grouped_mm_common.<locals>.check_scale.<locals>.<lambda>  s4    i
|3J399UV<.X^_d_j_jkl_m^nnopr<   c                  V    d d j                   dz       dj                   d    dS )Nr  z non-batch dimension to be r   rY	  r  r   )r:	  r  rO	  rU	  s   r:   r\   z>_meta_grouped_mm_common.<locals>.check_scale.<locals>.<lambda>  sC    i
|3NsyyYZ]gYgOhNiiopup{p{|}p~o  @A  Br<   )r}   rM   r^   r   r   r   )rO	  r  r:	  rU	  rT	  s   `````r:   check_scalez,_meta_grouped_mm_common.<locals>.check_scale  s   wwyA~IIK1$d '')F KKNcii
&;>N&NN Q
 IIK1$d LLOq(\ KKNciil2p KKNciiJ&?? Br<   r   r  r  c                       y)Nz:Scale result tensor provided, but it is not supported yet.r@   r@   r<   r:   r\   z)_meta_grouped_mm_common.<locals>.<lambda>  rd   r<   c                  N    d j                          dj                          dS )Nz/Offsets tensor not provided, but is needed for zD/zD multiplicand layouts.r   r4	  s   r:   r\   z)_meta_grouped_mm_common.<locals>.<lambda>  s(    Eeiik]RTUZU^U^U`Taaxyr<   c                  ,    d j                          dS )Nz.Offsets tensor must be 1D, but got offs.dim()=r  r   r(	  s   r:   r\   z)_meta_grouped_mm_common.<locals>.<lambda>  s    HTUVr<   c                  $    d j                    dS )Nz7Offsets tensor must be integer (int32) tensor, but got r  r   r_	  s   r:   r\   z)_meta_grouped_mm_common.<locals>.<lambda>  s    QRVR\R\Q]]^_r<   c                       y)NzJOffsets tensor provided, but is not needed for 3D/3D multiplicand layouts.r@   r@   r<   r:   r\   z)_meta_grouped_mm_common.<locals>.<lambda>  s    `r<   c                       y)Nz2Bias tensor provided, but it is not supported yet.r@   r@   r<   r:   r\   z)_meta_grouped_mm_common.<locals>.<lambda>  s    Dr<   c                       y)Nz4If output dtype provided, it must be torch.bfloat16.r@   r@   r<   r:   r\   z)_meta_grouped_mm_common.<locals>.<lambda>  s    Fr<   r%  )
rM   r^   rU   r  r  r}   r  r   rv  r-	  )r.	  r/	  r  r  r(	  rp  r  rr  r  scaledmat_a_is_2dmat_b_is_2dr  r  rI	  r[	  rT	  s   ``  `            r:   _meta_grouped_mm_commonrg	  W  s    
LL	Dgo.> D 8WD%8F KK5...U5;;%BUBU3Ux	

 	KK5>>)KekkU^^.Kt	

 
LL		v7%))+"7q
 ))+"K))+"K	>	> 	z	
 	}	

0 ''w2MMU]]*Mw}}/M V	

	B "-++DJJqMST 	 	Iwq2BCIwq2BCD P	

 ky	
 LL
aV LL

ekk)_
 	DL`	

 
LLD
 
LLT8Y%..8F
 ,E5$	JJr<   c           
      (    t        | |d d ||d |      S )N)r  r  r(	  rp  r  rr  rg	  )r.	  r/	  r(	  rp  rr  s        r:   
grouped_mmrj	    s)     #	 	r<   c	                 *    t        | ||||||||	      S )N)r  r  r(	  rp  r  rr  r  ri	  )	r.	  r/	  r  r  r(	  rp  r  rr  r  s	            r:   meta_scaled_grouped_mmrl	    s,     #!%
 
r<   rJ   half_to_floatc                    |r| j                   t        j                  k(  sJ t        j                  | t        j
                  j                        \  }}|s|n|}t        j                  | |t        j                        }|S )Nr  r  )	rU   rM   rO   rE   r   r   rF   r   r   )rJ   r}   rm	  computation_dtyperI   r  s         r:   softmaxrp	  -  so     ww%**$$$&+&>&>	uDDLL'#| (5<:KL


1L@W@W
XCJr<   c           	      b  	
 t        j                  t              dz  dk(  fd       | j                  t              	t              dz  }	|z
  t        j                  	|k\  	fd       t	        d        }t        |      D ]^  t              dz   dz  z
  
z      
   z   
dz      z   }t        j                  |dk\  
fd       |j                  |       ` t        j                  || j                  | j                  | j                  t        |             S )Nr&  r   c                       dt                S )Nz1Length of pad must be even but instead it equals r  rq  s   r:   r\   z'_constant_pad_nd_meta.<locals>.<lambda>A  s    CCH:Nr<   c                  (    dt               d  dS )Nz`Length of pad should be no more than twice the number of dimensions of the input. Pad length is z while the input has z dimensions.r  )l_inprr  s   r:   r\   z'_constant_pad_nd_meta.<locals>.<lambda>K  s      225c(;P'r<   r   c            	      F    d z       d    ddz       d z    d	S )NzThe input size z, plus negative padding r   r   zG resulted in a negative output size, which is invalid. Check dimension z of your input.r@   )r  r1  l_diffrr  pad_idxs   r:   r\   z'_constant_pad_nd_meta.<locals>.<lambda>V  sE    ok&1*&=%>>V7|nE#gk"2!3 4117!OMr<   )rU   rz   r|   r   )rM   r^   r   r   r   r   r   r   rU   rz   r|   r"   )r  rr  rA  l_padr   new_dimr  r1  rv	  rt	  rw	  s    `    @@@@@r:   _constant_pad_nd_metarz	  ;  s!    
LLC1N
 ++KEHMEU]F	LL	 [&)*I5\c(q1uk*fqj)CL83w{;KKqLM	
 	!  ;;kk||))+E2 r<   r  r  r  c                    | j                         dk(  sJ d       | j                  }|j                  }|j                  dk(  r|d   f}n$|j                  dk(  r|d   |d   f}n
g ||d   }| j                  }| j	                  ||      S )Nr&  z'weight' must be 2-Dr   r   r   )r}   r   r   rU   r   )	rn  r   r  r  r  weight_shapeindices_shaper   rr  s	            r:   	embeddingr~	  e  s     ::<1444<<LMMM||q&21o%7			"1%|A7	5m5\!_5	IIY77r<   max_lengthspadding_valuec                     t        |      dk(  sJ t        |      dk(  sJ |d   j                  d   dz
  }|d   }||g| j                  dd  }| j                  |      S r  )r   r   r   )r   r  r	  r	  r  r  r  s          r:   $meta__jagged_to_padded_dense_forwardr	  }  sv     w<1{q   
aAAAq,6<<+,LL))r<   c                 B    t        |       t               d               }|S )Nc                 8    t        | t        j                        S r  rK   r   r  rS  s    r:   _fz)_create_unary_float_meta_func.<locals>._f  s      =JJ
 	
r<   rA   r(   funcr	  s     r:   _create_unary_float_meta_funcr	    *    4]
  

 Ir<   c                 B    t        |       t               d               }|S )Nc                 :    t        | |t        j                        S r  r	  )rJ   rf  s     r:   r	  z*_create_binary_float_meta_func.<locals>._f  s      q!@!M!M
 	
r<   r	  r	  s     r:   _create_binary_float_meta_funcr	    r	  r<   c                      t                fd       } j                   d}||_         t        t        t        |            |      }|S )Nc                 `     | g|i |}t        | j                  |j                         | S r7   r@  )r   rG   r  r  r9   s       r:   _fnz#_register_inplace_meta.<locals>._fn  s.    '''

CII6r<   rH   )r   rr   rA   getattrr/   )r9   r	  inplace_names   `  r:   _register_inplace_metar	    sO    
2Y 
 kk]!$LCL
4-l3
4S
9CJr<   c                 f    t        j                   j                  j                  k(   fd        g}t        t              rQj
                  dk7  r1t        j                   j                  j                  k(   fd       |j                         t        |dt        j                  iS )Nc                  <    dj                    d j                    S )Nr  z for `end`, but got dtype r   )rs   rt   s   r:   r\   zlerp.<locals>.<lambda>  s    /%++.HTr<   r   c                  <    d j                    dj                    S )Nr  z for `weight`, but got dtype r   )rt   rn  s   r:   r\   zlerp.<locals>.<lambda>  s    /%++6STZT`T`Sabr<   rB   )
rM   r^   rU   rg   r#   r   r   rK   r   rF   )rt   rs   rn  rG   s   ``` r:   lerpr	    s     
LLsyy T 3<D&*%;;!LLv||+b 	F	=EE r<   )rA  c                <    t        | ||t        j                        S r  r  r  tensor1tensor2rA  s       r:   addcmulr	    s!     w0O0W0W r<   c                    t        j                  t        j                  |j                        xr t        j                  |j                         d        t        | ||t        j                        S )Nc                       y)N)zFInteger division with addcdiv is no longer supported, and in a future zErelease addcdiv will perform a true division of tensor1 and tensor2. z4The historic addcdiv behavior can be implemented as zA(input + value * torch.trunc(tensor1 / tensor2)).to(input.dtype) zfor integer inputs and as z6(input + value * tensor1 / tensor2) for float inputs. z?The future addcdiv behavior is just the latter implementation: z4(input + value * tensor1 / tensor2), for all dtypes.r@   r@   r<   r:   r\   zaddcdiv.<locals>.<lambda>  s     	
r<   r  )rM   r^   rE   ri  rU   rK   r   rF   r	  s       r:   addcdivr	    sb     
LL""7==1 6&&w}}5	
		
  w0O0W0W r<   c                     i } dD ]  }t         |   }|D ]  }|| vs||   | |<    ! | j                         D ]  \  }}t        |t        j                  j
                        r,t        |t              sJ  |j                  t        j                  j                  j                        |       t        j                  j                  |j                         d      r|t         d   v st        | d      |j                  r|j                         dv rd|j                         v rt        j!                  ||       	d|j                         v rt"        j!                  ||       3d|j                         v rt$        j!                  ||       ]d	|j                         v rt&        j!                  ||       t(        j!                  ||        y )
N)rw   post_autogradpre_autogradCompositeImplicitAutogradrw   z is a CompositeImplicitAutograd op, we shouldn't register meta function for it. Instead, we should let the decomposition run and write meta kernels for the base operators.>   aten::cloneaten::copy_aten::rot90aten::_to_copyaten::empty_stridedaten::constant_pad_ndaten::as_strided_scatterzmkldnn::zmkl::zonednn::zquantized::)r   itemsrg   rM   _opsHigherOrderOperatorr   py_impl_CDispatchKeyr1   %_dispatch_has_kernel_for_dispatch_keyr  r[  is_view2_meta_lib_dont_use_me_use_register_meta_for_mkldnnimpl/_meta_lib_dont_use_me_use_register_meta_for_mkl2_meta_lib_dont_use_me_use_register_meta_for_onednn5_meta_lib_dont_use_me_use_register_meta_for_quantized'_meta_lib_dont_use_me_use_register_meta)activate_meta_tablerq   registryopoop_overloadr9   s         r:   activate_metar	    s    :-d3C--+3C=#C(  : /446R
 k5::#A#AB+z2226EHH00556r:8899 ;
 8@@""m $; ; 
    	 [--//BGGUWXK,,..?DD[RTU{//11BGGUWX+"2"2"44EJJ 8<<["Mm 7r<   )F)Tr  r7   )NNNFr   r   r   r  r  )r
  )r  T)FF)TT)r  )FTN)TFF)TF)r&  )g      ?N)r3   str)r@   r  r%  F)r@   r  FTN)Fr   FNFr   )NF)r   F)g      ?gUUUUUU?FN)NNNNN)r   NNr   )NNF)        FFN)Nr	  FFN)r	  FNN)r	  FN)FN)FNNNN)NNNF)Nr   FNN)NNNN)r   TT)NNr   N)d   r   r   )r   )NNNNF)r   FF)r	  (  r  r   collections.abcr   enumr   	functoolsr   r   typingr   r   r	   r
   typing_extensionsr   rM   torch._prims_commonr  rE   r   r   r   torch._decompr   r   r   r   
torch._opsr   torch._primsr   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   torch._prims_common.wrappersr$   r%   r&   r'   r(   r  r)   r*   torch.fx.experimentalr+   r  torch.utilsr,   r=   r-   r.   opsr/   libraryLibraryr	  r   r  r  r  rA   rK   rW   r`   linspacelogspacer  r   taker^  r  r   r   r   r   r   rQ  r   r   cummaxcumminr   r   r  r   r  r   r  _fft_c2cr$  r'  r(  _fft_r2cr:  randpermgenerator_outr?  r   rD  randintrM  rJ  low_outrP  randrR  _fft_c2rrW  r  rb  ri  
unsqueeze_rm  _sparse_semi_structured_linearr	  rU   ry  _sparse_semi_structured_mmr  _sparse_semi_structured_addmmr  _cslt_sparse_mmr  index_reducer  index_reduce_r  index_selectr  segment_reducer  r   	unary_outr  r}   r  r/  r  r  r  r  r  _assert_asyncr  r   r  _printr  _make_dep_tokenr  r  _functional_sym_constrain_ranger  r  (_functional_sym_constrain_range_for_sizer  _functional_assert_asyncr  r   r  r   r  r	  r  r  _linalg_eighr  r!  _linalg_eigvalslinalg_eigvalsr%  
linalg_eigr(  r,  r.  r4  r9  r=  rC  rV  linalg_inv_exrY  linalg_ldl_factor_exr]   r_  linalg_ldl_solverm  	linalg_lurs  linalg_lu_factor_exrw  linalg_lu_solver  	lu_unpackr  r  	linalg_qrr  r  r  _linalg_svdr  rj  r5  r  r  linalg_solve_triangularr  r  r  _linalg_detr  r  r   r  reflection_pad1dr  replication_pad1dr!  r-  reflection_pad1d_backwardr2  replication_pad1d_backwardr4  rD  reflection_pad2drF  replication_pad2drI  reflection_pad2d_backwardr.  replication_pad2d_backwardrP  r^  reflection_pad3dr`  replication_pad3drc  reflection_pad3d_backwardreplication_pad3d_backwardrj  _pdist_forwardrQ   rn  _pdist_backwardrt  baddbmmr  	bernoullir  
bernoulli_r  rk  r  poissonr  _fused_moving_avg_obs_fq_helperr  mmr  r  r.  r  r  miopen_batch_normr  convolutionr  r	  _has_mkldnnr	  r  _convolution_pointwiser  _linear_pointwiser  has_mklr	  r  _mkl_linearr  r	  r   qconv2d_pointwiseqconv_pointwiser  binaryr  qlinear_pointwiserf  r  binary_tensorr!  linear_dynamic_fp16linear_relu_dynamic_fp16r#  r	  r$  
max_pool2dr/  int4mm_packed_weight_cpur7  r<  
avg_pool2drc  rh  avg_pool2d_backwardrp  
avg_pool3dr  avg_pool3d_backwardr  _adaptive_avg_pool2dr  _adaptive_avg_pool3dr  _adaptive_avg_pool2d_backwardr  _adaptive_avg_pool3d_backwardr  r  adaptive_max_pool2dr  r  r  adaptive_max_pool3dr  r  r  repeat_interleaver  rh   r  r  r  r   _unsafe_indexr  convolution_backwardr	  addbmmr  randint_liker  _fused_adam__fused_adamw_r+  _fused_adamr1  _int_mmr;  _convert_weight_to_int4packrG  #_convert_weight_to_int4pack_for_cpurL  _weight_int4pack_mmrW  _weight_int4pack_mm_for_cpur]  rf  rh  r  _dyn_quant_pack_4bit_weightr  _dyn_quant_matmul_4bitr  _weight_int8pack_mmr  _cdist_forwardr  _cdist_backwardr  _embedding_bagr  _embedding_bag_forward_onlyr  r  nansumr  median	nanmedianr  
dim_valuesr  r   r  logical_not_r  repeatr  zero_r  mul_Scalardiv_logical_and_logical_or_logical_xor_r  add_sub_r  rounddecimalsr  r  
__rshift__r!  
__lshift__r%  zeror'  r  r*  fillr,  relu_r/  	_add_relur1  rrelu_with_noiser6  rrelu_with_noise_functionalr8  rrelu_with_noise_r:  	index_put_unsafe_index_putr>  masked_fill_rB  _masked_scalerE  masked_scatter_rI  masked_scatterrK  masked_scatter_backwardrM  
index_put_rO  aliasrR  r_  bmmrb  rd  rh  rm  rW  rX  r  r  r  r)   max_pool2d_with_indices_backwardr  max_pool2d_with_indicesr  fractional_max_pool2dr  max_pool3d_with_indicesr   max_pool3d_with_indices_backwardr  r  r  r  grid_sampler_2d_backwardr  r  r  r  r  onesr  zerosr  r  r  select_scatterr  slice_scatterr  r   r  r"  gatherr(  r8  r?  rA  rK  rN  scatter_addrS  scatter_add_rV  rM  r_  rA  value_reducerZ  scatter_r\  #_scaled_dot_product_flash_attentionrt  r}  #_scaled_dot_product_cudnn_attentionr  0_scaled_dot_product_fused_attention_overrideabler  ,_scaled_dot_product_flash_attention_backwardr  +_scaled_dot_product_flash_attention_for_cpur  4_scaled_dot_product_flash_attention_for_cpu_backwardr  '_scaled_dot_product_efficient_attentionr  0_scaled_dot_product_efficient_attention_backwardr  ,_scaled_dot_product_cudnn_attention_backwardr  _flash_attention_forwardr  _flash_attention_backwardr  _efficient_attention_forwardr  _efficient_attention_backwardSymIntr  
_scaled_mmr  scatter_reducetwotwo_outr  scatter_reduce_r  multinomialr
  r  r  r"  _upsample_nearest_exact1dr+  _upsample_nearest_exact2dr3  "_upsample_nearest_exact2d_backwardr7  _upsample_nearest_exact3dr   r9  values_stabler<  rS  _thnn_fused_lstm_cellrY  rp  rz  r  r  r  argminr  r  topkr  _segment_reduce_backwardr  kthvaluer  r   r  r  r  r  pixel_shuffler  r  	bucketize
Tensor_outr  histcr  _upsample_bilinear2d_aa_upsample_bicubic2d_aar   _upsample_bilinear2d_aa_backwardr  r  r  r  r  searchsortedr	  r	  embedding_dense_backwardr		  _embedding_bag_backwardr	  _embedding_bag_dense_backwardr	  *_embedding_bag_per_sample_weights_backwardr	  isinr	  	polygammar	  _local_scalar_denser!	  r#	  r%	  r-	  rg	  _grouped_mmrj	  _scaled_grouped_mmrl	  _softmaxrp	  constant_pad_ndrz	  r~	  _jagged_to_padded_dense_forwardr	  r	  r	  special_airy_aispecial_bessel_y0special_bessel_y1special_modified_bessel_i0special_modified_bessel_i1special_modified_bessel_k0special_modified_bessel_k1!special_scaled_modified_bessel_k0!special_scaled_modified_bessel_k1special_chebyshev_polynomial_tspecial_chebyshev_polynomial_uspecial_chebyshev_polynomial_vspecial_chebyshev_polynomial_w&special_shifted_chebyshev_polynomial_t&special_shifted_chebyshev_polynomial_u&special_shifted_chebyshev_polynomial_v&special_shifted_chebyshev_polynomial_wspecial_hermite_polynomial_hspecial_hermite_polynomial_hespecial_laguerre_polynomial_lspecial_legendre_polynomial_pr	  r	  r	  r	  lerp_addcmul_addcdiv_torch._refs.nn.functionaltorch._refs.specialr	  r@   r<   r:   <module>r
     sBH     $  # 5 5 '  # + +  " 
     < 7 ) T]t_yy~~*/--*?*?PV*W ' %a )X
8BF#3"4hr2v6F"FG 
3(* t}}-.
 

==5  /5p 		!!499==12'  3' !!))4+<+<+@+@AB%' %  C%&FRAH tyy  !M "M t%%&I  'I 	[[$++//4;;+>+>P Xy! " !!))4+<+<+@+@ABI  CI3lV $s) 4  %%t}}'8'89:K  ;K $s)  %%t}}'8'89:8
  ;8
v t}}**+"& 3 ,3 t}}$$% **
 &
 $$dll&6&678
 **  9&   $,,"6"678 **  9& 		!!499==12%)$tPT   3 %%t}}'8'89:$Dv $DDI $Dc $DC $D  ;$DN tzz!!" #0( t&&' ( t223
 "%)'+  6
	
 c] $ 4B t../
 (,	
  $	 00 t112 	
'+
  	 $ 3D t##$ ""'+"-<,,-<\\-< 6
-< F	-<
 $-< -< -< -< -< %-<` t  (() 	I
	I		I 	I LL		I
 	I 	I 	I *	I t!!))* 	
			 	 LL		
 	 	 	 +	 t  (()' * ' t""**+
 !% $ $ W
 W W f	 W
 f W f W  W  W  W , WF   $(("4"456  7 txx||    $(("4"456  7 txx||  tzz!!"6 #6 tzz~~( (
 t!!))* + t!!%%& ' t{{""# $ t##++, ) -) t''//0, 1, t33;;< =
 t008896 :6& t<<DDE F
 t,,001 2
F C    F  #  N (,


 !%
$V S C 
 
F 
$ 
 
"  	  	C  !!))4+<+<+H+HIJ]N+ s T  , K" $$,,d.A.A.E.EFGB B6 B  HB  !]N+	6 	 , "	Q QF Q t**+) )F )4 )F )  ,) t""#J JF J4 JF J  $J t}})6 )$ )6 )  ) t$$%)6 )$ )6 )  &) t&&../&  T  0" 	$$,,d.M.M.Q.QR .f .6 .f . .d t!!))*&   + ))1143L3L3P3PQRT8V$ 	
  	
 666!" % S& %%--t/D/D/H/HIJ ''' '
 ' '  K'T &&(:(:;<S#s/3 f  fff>T8U   =4 ((00$2J2J2N2NOPT8V$ 	  	
 666!" % QD $$,,d.B.B.F.FGH 444 4
 4 4 4  I4n t~~S#s 	$$$ $ 	$
 666!"$  $P tTz!2 * &&(:(:;<S#f C ffn8M   =4 $$,,d.B.B.G.GHIV[$1'v '%(F"G ' 2 J'$ t''(   	""" " SM	" )"J.
.
. 49d3i .".
.
. 3-. 66>	.(f V   t$$%
 ##!777 	7
 7 V7 	7 V7 6
7 6666)*7 &7t ,,44d6R6R6V6VWX   	
   
&	  Y2 t$$%S#4( +(
+(+( +( 	+(
 +( 66>+( ) &+(^ t''(
 )
 tzz
 WW	W W 	W
 W W  Wt>#;L t$$%=  &= t%%&>  '>(< t--.\S  /S t../\T  0T2Ej t$$%=  &= t%%&>  '> &&..&&11''//''22	 \& &:<G~ t$$%=  &= t%%&>  '> &&..&&11''//''22	 \$( $(N t""#

f 

 

v 

  $

 t##$Pv PV P Pf PQW P  %P $$dll&6&678/0 '  9': &&(:(:;<&* I  =I
 t$$% & t~~ I !I
 $$dll&6&678"  9" t33;;< * =*. tww	  	B
* 7;i,,iLLi $s)S.!i 49c>"	i
 DIsN#i i i U49c>23iXQ t%%--."$,,"$LL"$ 5<<
 "$ 5<<(	"$
 %,,'"$ "$ !&"$ "$ /"$J t''(),,)LL) ,,) I	)
 #Y) 3i) ) I) ) ))X 	889>9N9N&&:6 599##::BBC D, 599##55==>S ?S
 xx:?--:O:O66;
7 
uyy}}00	1	 
2	
 :?9N9N&&:6 599##55==>599##33;;<" = ?"H 599##55<<= >6 599##55==>599##55<<= > ?, 599##55<<=599##55CCD E >: 599##77??@599##<<DDE	 F A	 =BMM<Q<QVV=9 599&&112 
 3
8 599&&??@@ A@( t&&' M (Mb(<X t''//0E 1EP t UJ   UJp t''(\K(  )K(\ t((001 2" t((001@ 2@ t1199:F ;F, t112\P  3P
	
6 	
S 	
 t''(UI+  )+\ t001\H  2H$ t''(UI'  )'T t001\(  2(
 t%%,,-* .* $$dll&6&678T  9T ##++T-@-@-D-DEF46 @c @  G@ 		&&..		0F0F0J0JKL  M" 

!!4#5#5#<#<=> ?D ))1123H 4H: ##T[[__56./q '  7'0 !!(()*' +' !!))4+=+=+E+EFG  !
 H
2   (()*  !! +!H ~B  B* 0012 3& 889: ; (()*@ +@ 0012< 3< >>?@< A<"3 "3 "3 "jZ 0012D!)&!1D 3D0 ++,-; .;  (()*< +< t""**+ & , &F t##$G  %G* t""**+
 	
`5 ,`5F t//7785 95
 ##T[[__56=$ =  7= ##T^^%;%;<=) >) !!					 Xy! "	 t  (() * t{{""#' $'& tzz!!" # 								!!  !!

 									**Z 

""DJJ$7$789 :
" &&(>(>?@ A &&(>(>?@ A tyy  !& "& 

!!4::#4#456 7 		  $))"2"234" 5" tzz!!" # t~~$$%F   & %%&'RV"  (" 0012RV; 3; &&'(KO ) &&(>(>(F(FGH" I" t  ''( )
 t!!))* + t##$	 %	 t""#6  $6 t++,! -! t&&' ( tzz!!"! #!&R txx 5 !5 txx~~J J6;h #-YYY 	Y 		Y
 	Y 	Y 	Y 	Y 	Y 	Y 	Y Y Y Y Y  !Y" #Y$ %Y& 'Y( )Y* +Y, -Yx;4|383838 38 		38
 	38 	38 	38 	38 	38 	38 	38 	38 38 38 38  !38" #38$ %38& '38lI2X t44<<=( >(V t++334 # 5#L t))112Q 3Qh t++,UI d  -dN t445\b  6bJ%
V %
6 %
Pt  v 3 $ t,,445# 6#$ t##$8  %8" t,,-\;'! ( .!, 		!!"#. $. t&&' ) ()X 		!!499==12   3( 

""DJJNN34   5( t{{E  E@ t""**+. ,. t!!))*. +.
	 	C 	d 	/
  t{{""#' $'6
 
-
b4 t''(& )&
 t  ! "
 !!	 & & ""	 889:
 #!==	= = 	=
 = = E?= ;=@S#X( 889: #!((	( ( 	(
 ( ( ( ( E?( ;(V EEFG
 #'#!''	' ' 	'
 ' ' ' E?' H'T 99( """" 
" 	"
 
" " " " " " " " " " E?"
". 88 "&!	  	
   E?
: AA #'!'"'"'" 
'" 	'"
 
'" '" '" '" '" E?'"
'"T <<=> !))))	)) )) 	))
 )) )) E?)) ?))X ==" !4-4-4- 
4- 	4-
 4- 
4- 4- 4- 4- 4- $Z4- 4- E?4-
4-n 99* "!""" 
" 	"
 
" " " " " " " " " " "  E?!"
"0 %%  "&*'+"&%)HH	H H 	H
 H H H H H H E?H smH  }H H 6"H
HV &&( "&*'+#,,, 
, 	,
 
, , , , , , , , , , E?,  sm!,"  }#,
,4 ))   %!(,!%!%,S,S	,S ,S 6
	,S
 6",S 6",S 3-,S 3-,S ,S ,S ,S E?,S f%,S v,S #,S
,S^ *** "$("'%474747 
47 	47
 6
47 6"47 6"47 ,,47 ,,47 47 47 47 47 47 47  E?!47" SM#47$  %47
47n ''() $(+/'+ _Y
,,_Y
,,_Y \\_Y \\	_Y
 5<<
 _Y 5<<(_Y $_Y _Y *_YD ##'')<)<)D)DEF&  G&
 t##''( )
   (($*:*:*>*>?@	 	  A	,* 	$$d&D&D&L&LM

 	$$d&D&D&L&LM. ((00//77 !% $%U\\ 123 sELL012 uo	
 uo: 	$$d&D&D&L&LM

 									&$N t))112
  3 t&&'4/ (4/n t$$,,-$% .$%N


 ##T[[%8%89:4 ;4 t!!))* + tyy  !
Q "
Q t,,-LP  .  %%t}}';';<=Xy!K " >K  #("9"9 Q t77??@	* A	* t##++,	0 -	0 t!!))* +> t--556F 7FD %%t~~'@'@AB27u   C 

|E  E4 	!!))4+F+F+N+NO & 55==>?  @8 t>>FFG H$ '')<)<=>'  ?'
 uyy~~(() *8 uyy~~  !6 "6$ t  !
 
	-K  "-K` t,,- . t++, '
 -'
T t112  3, t>>?  @. tyy8=e 8  8  t~~6c 6 6F 6  6 t''(I& I )I tyy"v "& "  " t||6& 6V 6  6 P "!+/'+ eKeKeK ell#eK ell#	eK
 6
eK 6
eK 5<<(eK $eK eKP t  "!'+ 6
 6
	
 $   !& ''//01 $(#'+/'+ <<<< \\ \\	
 5<<
  5<<
  5<<( $  20 t}}	v 	C 	 	 	  	 t##$%  %%P t~~ $888 8 	8
 8 8  8, t33;;<
 	**&\* c* 	* =*  d22 3 d44 5 d44 5 d== > d== > d== > d== > dDD E dDD E tBB C tBB C tBB C tBB C tJJ K tJJ K tJJ K tJJ K t@@ A tAA B tAA B tAA B tyy  $ t||./    t||./   , 	tyy)!$,,/!$,,/
    BNJ r<   