
    hKT                         d dl mZmZmZmZ d dlZd dlmZ ddl	m
Z
mZmZmZmZ ddlmZ ddlmZ ddlmZmZmZmZ ddlmZ dd	lmZ dd
lmZ ddlmZ  ej@                  e!      Z" G d de      Z#y)    )AnyCallableOptionalUnionN   )(DiaClassifierFreeGuidanceLogitsProcessor"DiaEOSChannelFilterLogitsProcessor!DiaEOSDelayPatternLogitsProcessorLogitsProcessorListTemperatureLogitsWarper)StoppingCriteriaList)BaseStreamer)GenerateOutputGenerationConfigGenerationMixinGenerationMode)is_deepspeed_zero3_enabled)is_fsdp_managed_module)PreTrainedModel)loggingc                   X    e Zd ZdZ	 	 	 	 	 	 	 	 d%dedee   deej                     dee	eej                  gee   f      dee   dee   deeeef      d	eej                     d
eej                     def fdZ	 d&dee   dee   dedeeef   f fdZ	 	 	 d'deej                     deej                     deeeej                  f      deej                  ee   eeej                  f   f   f fdZ	 d&dededeeej                  f   dej                  deej*                     deej                  eeej                  f   f   fdZ	 	 d( fd	Zedej                  dedeej                     dej                  fd       Z	 	 	 	 	 	 	 	 	 	 	 	 d)deej                     dee   dee   dee   dee	eej                  gee   f      dee   ded   d ed!   d	eej                     d
eej                     dee   d"ee   fd#Z ej8                         	 	 	 	 	 	 	 	 	 	 	 	 d)deej                     dee   dee   dee   dee	eej                  gee   f      dee   ded   d ed!   d	eej                     d
eej                     dee   d"ee   deeej                  f   fd$       Z xZ S )*DiaGenerationMixinNgeneration_configinput_ids_seq_lengthencoder_input_idsprefix_allowed_tokens_fnlogits_processordevicemodel_kwargsnegative_prompt_idsnegative_prompt_attention_maskreturnc
                    |j                   }
|j                  }d |_         d |_        t               }||dk7  r|j                  t	        |             |j                  t        t        | j                  j                        | j                  j                               t        | -  |||d |||||		      }|
.|
dk7  r)t        |
|j                        }|j                  d|       |j                  t        | j                  j                  | j                  j                  |j                   |             |
|_         ||_        |S )Ng      ?)num_channelseos_token_id	r   r   r   r   r   r   r   r    r!      )guidance_scaleguidance_top_kr   )delay_patternr%   max_generation_lenr   )r(   temperaturer   appendr   r	   lenconfigr*   r%   super_get_logits_processorr   top_kinsertr
   
max_length)selfr   r   r   r   r   r   r   r    r!   original_guidance_scaleoriginal_temperaturecustom_processorsmerged_processorscfg_processor	__class__s                  e/var/www/html/aiagenthome/venv/lib/python3.12/site-packages/transformers/models/dia/generation_dia.pyr1   z(DiaGenerationMixin._get_logits_processor,   sS    #4"B"B0<<+/((,% 01+0D0K$$%<=Q%RS  . !:!:;![[55	
 "G9/!5/%).% 3+I : 

 #.3Ja3OD6066M $$Q6  -"kk77![[55#4#?#?		
 ,C((<%      use_model_defaultskwargsc                     t        |   ||fi |\  }}|xj                  t        | j                  j
                        z  c_        |j                  d uxr |j                  dk7  | _        ||fS Nr'   )r0   _prepare_generation_configr4   maxr/   r*   r(   	_uses_cfg)r5   r   r>   r?   r   r;   s        r<   rB   z-DiaGenerationMixin._prepare_generation_configo   s{     +0'*L1+
5;+
'< 	$$DKK,E,E(FF$ +99EoJ[JjJjnoJo ,..r=   inputsbos_token_idc                    t         |   |||      \  }}}| j                  rXt        j                  |      }t        j
                  ||gd      }|j                  dd       |d   j                  dd      |d<   |||fS )N)rE   rF   r   r   dimattention_mask   r'   )r0   _prepare_model_inputsrD   torch
zeros_likecatgetrepeat)r5   rE   rF   r   
input_nameunconditioned_inputsr;   s         r<   rL   z(DiaGenerationMixin._prepare_model_inputs   s     ,17+H%% ,I ,
(
L >>#(#3#3F#; YY(<=1EF 0$7C1=>N1O1V1VWXZ[1\-.z<//r=   
batch_sizemodel_input_namedecoder_start_token_idc                 8   dx}}|d|v r|j                  d      }|d|v r|j                  d      }||t        j                  d|du d|du d       | j                  j                  j
                  }| j                  r|dz  n|}	|*t        j                  |	d|f|t        j                  |	      }t        j                  |	|j                  d   ft        j                  |
      }|j                         }
|j                  d   |dddddf   | j                  j                  k(  j                  d      j                         z
  }|
ddd|f   j                  dd      j                         }|ddd|f   j                         }||d<   |
|d<   ||fS )zGPrepares `decoder_input_ids` for generation with encoder-decoder modelsNdecoder_input_idsdecoder_attention_maskz[In order to generate with Dia, we need the processed audio input: Got `decoder_input_ids`: z" and got `decoder_attention_mask`=z]. This can be achieved via the [`DiaProcessor`] but now defaulting to non-delayed generation.rK   r'   )dtyper   )sizerZ   r   r   rH   decoder_delay_mask)poploggerwarning_oncer/   decoder_configr$   rD   rM   fulllongonesshapepad_token_idsumrC   	transpose)r5   rT   rU   r   rV   r   rX   rY   r$   real_batch_size
delay_maskvalid_input_sizes               r<   )_prepare_decoder_input_ids_for_generationz<DiaGenerationMixin._prepare_decoder_input_ids_for_generation   s    6:92#(;|(K , 0 01D E#(@L(P%1%5%56N%O" $(>(F%T122TUkswUwTx yop  ;;55BBL15jAoZO ($)JJ$a68NV[V`V`io%! &+ZZ%'8'>'>q'AB%**]c&"
 '++-
##A&*;Aq!G*DH`H`*`)e)ejl)e)m)q)q)ss 	 'q*;+;*;';<FFq!LQQS!7;L<L;L8L!M!R!R!T 2H-.-7)* ,..r=   c                    | j                   r|d   j                  d   dz  n|d   j                  d   }|j                  || j                  j                  j
                  d      j                  dd      }t        	|    |fd|i|}| j                  || j                  j                  |      |d<   |j                  dd      r'|d	   d   dkD  r|d   d d dd d f   d d d d d f   |d<   |d   j                         |d<   | j                   rRd
D ]M  }|j                  |d       t        dgdg||   j                  dz
  z  z         } ||   j                  | ||<   O |S )Nr   rK   r\   r'   encoder_outputsrX   	use_cacheFcache_position)rX   rY   decoder_position_ids)rD   re   reshaper/   ra   r$   rh   r0   prepare_inputs_for_generationapply_delay_maskrf   rP   
contiguoustuplendimrQ   )
r5   	input_idsrn   r]   r?   rT   model_inputskeyrepeat_patternr;   s
            r<   rs   z0DiaGenerationMixin.prepare_inputs_for_generation   s    :>_Q'--a0A5_]^M_MeMefgMh
%%j$++2L2L2Y2Y[]^hhijlmn	 w<YrXgrkqr -1,A,At{{//1C-
()
 K/LAQ4RST4UXY4Y0<=P0QRSUWYZRZ0[\]_cef\f0gL,- -99L,M,X,X,Z() >>^##C.:%*A3!S8I8N8NQR8R1S+S%TN(@S(9(@(@.(QL%	 _ r=   rx   pad_idrj   c                     || S t        | j                  d   |j                  d         }|d d d |d d f   }| d d d |d d f   }t        j                  ||k(  ||      | d d d |d d f<   | S rA   )minre   rM   where)rx   r|   rj   mask_len
valid_maskvalid_inputs         r<   rt   z#DiaGenerationMixin.apply_delay_mask   s    yq):+;+;A+>?9H9a0
9H9a0 &+[[v1E{T^%_	!YhY/"r=   stopping_criteriasynced_gpusassistant_modelr   streamerr   custom_generatec                 
   | j                  |||||      } | j                  ||fi |\  }}|j                  |      }| j                  |j	                                | j                  |||       |0t               xs t        |       xr t        j                         dkD  }||n	t               }||n	t               }|j                  dd       d u}| j                  ||j                  |      \  }}}|j                  d   }|j                   }| j#                  |||       d|vr| j%                  ||||      }| j'                  ||||j(                  |j                         \  }}|j*                  r!| j-                  ||j                  d            }||j/                  |j1                                |j                  d   }|j                  d	      d u xr |j2                  d u}|j                  d
      d u xr |j4                  d u}| j7                  ||||||      }| j9                         r	d|vrd|d<   | j;                  |||       |j2                  dz
  }|j                  d   |k7  r-|dk(  r(| j<                  j>                  s||j                  d   z  }| jA                  |||||       | jC                  ||||||j                   ||	|
	      }| jE                  |||j                  d            }|jF                  |d<   |jI                  d|j                  d         }|tJ        jL                  tJ        jN                  fv r3|jP                  dkD  rtS        d       | jT                  |f|||d||S tS        d      )Nr'   rJ   r   )r   rn   )rT   rU   r   rV   r   	tokenizerr\   r4   
min_length)r   has_default_max_lengthhas_default_min_lengthrU   inputs_tensorinput_ids_lengthlogits_to_keepinputs_embedsr&   )r   r   r   ro   z2`num_return_sequences>1` is incompatible with Dia.)r   r   r   zGot incompatible mode for generation, should be one of greedy or sampling. Ensure that beam search is de-activated by setting `num_beams=1`.)+_extract_generation_mode_kwargsrB   get_generation_mode_validate_model_kwargscopy_validate_generation_moder   r   distget_world_sizer   r   rP   rL   rF   re   r   _prepare_special_tokens._prepare_encoder_decoder_kwargs_for_generationrl   _decoder_start_token_tensortoken_healingheal_tokensputcpur4   r   _prepare_generated_length_supports_logits_to_keep_validate_generated_lengthr/   is_encoder_decoder_prepare_cache_for_generationr1   _get_stopping_criteriaro   rr   r   SAMPLEGREEDY_SEARCHnum_return_sequences
ValueError_sample)r5   rE   r   r   r   r   r   r   r   r    r!   r>   r   r?   generation_mode_kwargsr   generation_modekwargs_has_attention_maskr   rU   rT   r   rx   r   r   r   max_cache_lengthprepared_logits_processorprepared_stopping_criterias                                r<   _main_generate_loopz&DiaGenerationMixin._main_generate_loop   s4   $ "&!E!E"
 +J$*I*I1+
5;+
'< ,??P##L$5$5$78&&8IKab 57W;QRV;Wv]a]p]p]ruv]vK/?/K+QdQf1B1N-ThTj %1$4$45Et$LTX$X!8<8R8R%22L9
5' #((+
%%$$%68QZ`$a L0NN|-=?PL
 #'"P"P!-%#4#P#P '' #Q #
	< **((4J4N4N{4[\ILL) %??2.!'L!9T!A!nFWFbFbjnFn!'L!9T!A!nFWFbFbjnFn ::/#9#9-'- ; 
 ((*/?|/S-.L)*''(9;KMcd -77!;"&66 O3KK22 3 3A 66**|_jJZ	

 %)$>$>/!1+%=- ''% 3+I %? 
%
! &*%@%@//,00= &A &
" %6$?$?[! %%b)//"*=>	 ~44n6R6RSS 559 !UVV  4<<!:"<"3	
 )   T r=   c                    |j                  d      }||j                         } | j                  d|||||||||	|
||d|}t        |t        j
                         }|r|j                  }n|}| j                  j                  j                  }|j                  d   |z  }|j                  ||d      j                  dd      }| j                  || j                  j                  |      }|r	||_        |S |}|S )NrX   )rE   r   r   r   r   r   r   r   r    r!   r>   r   r   r\   r'   rK    )rP   cloner   
isinstancerM   Tensor	sequencesr/   ra   r$   re   rr   rh   rt   rf   )r5   rE   r   r   r   r   r   r   r   r    r!   r>   r   r?   rj   outputreturn_dict_in_generateoutput_sequencesr$   bszs                       r<   generatezDiaGenerationMixin.generate  s(   $ ZZ 34
!#))+J))) 
/-/%=#+ 3+I1+
 
  '1&F"F"%//% {{11>>$$Q'<7+33CrJTTUVXYZ  001A4;;C[C[]gh"/F  &Fr=   )NNNNNNNN)N)NNN)NN)NNNNNNNNNNNN)!__name__
__module____qualname__rD   r   r   intrM   
LongTensorr   r   listr   strdictr   r1   boolrv   rB   rL   r   rl   rs   staticmethodrt   r   r   no_gradr   r   r   __classcell__)r;   s   @r<   r   r   (   s   I
 /38<W[:> $156:AEA!+A! 'smA! $E$4$45	A!
 #+8S%,,4Gc4R+S"TA! ##67A! A! tCH~.A! &ell3A! )1(>A! 
A!H cg/!)*:!;/QYZ^Q_/ru/	%	&/$ *./3:>	0&0 u||,0 tC$567	0
 
u||Xc]Dell1B,CC	D08 *.0/0/ 0/ 3,-	0/
 !&0/ &0/ 
uc5<<&7!88	90/j 	#J ELL # 8TYT`T`Ka fkfrfr   *.8<:><@W[&*7;-16:AE-1)-Y&Y $$45Y ##67	Y
 $$89Y #+8S%,,4Gc4R+S"TY d^Y ""34Y >*Y &ell3Y )1(>Y %TNY "#Yv U]]_ *.8<:><@W[&*7;-16:AE-1)-9&9 $$459 ##67	9
 $$899 #+8S%,,4Gc4R+S"T9 d^9 ""349 >*9 &ell39 )1(>9 %TN9 "#9 
~u///	09 9r=   r   )$typingr   r   r   r   rM   torch.distributeddistributedr   generation.logits_processr   r	   r
   r   r   generation.stopping_criteriar   generation.streamersr   generation.utilsr   r   r   r   integrations.deepspeedr   integrations.fsdpr   modeling_utilsr   utilsr   
get_loggerr   r_   r   r   r=   r<   <module>r      sZ     2 1     A 0 a a @ 7 -  
		H	%g gr=   