
    hh!                        d dl Z d dlmZmZ d dlmZ d dlmZ d dlZddl	m
Z
 ddlmZ  e
j                  d      Zed	eej"                  eeef   fd
       Z G d de      Ze G d d             Ze G d d             Zy)    N)	dataclassfield)Enum)Optional   )logging)tracedContinuousBatchingLoggerreturnc                  @   t         j                  j                         rt        j                  d      } t         j                  j	                          t         j                  j                          t         j                  j                  |       j                  }t         j                  j                  |       }t         j                  j                  |       }nt         j                  j                  j                         rt         j                  j                  j                         rWt        j                  d      } t         j                  j                         }|t         j                  j                         z
  }d}nt        j                  d      } d }d}d}| |||fS )Ncudampsr   cpu)torchr   is_availabledeviceempty_cachesynchronizeget_device_propertiestotal_memorymemory_reservedmemory_allocatedbackendsr   is_builtdriver_allocated_memoryrecommended_max_memory)r   r   reserved_memoryallocated_memorys       s/var/www/html/aiagenthome/venv/lib/python3.12/site-packages/transformers/generation/continuous_batching/requests.pyget_device_and_memory_breakdownr       s   zz f%

 

 zz77?LL**44V< ::66v>				(	(	*u~~/A/A/J/J/Le$yy88:'%))*J*J*LLe$<2BBB    c                   ,    e Zd ZdZdZdZdZdZdZdZ	dZ
y	)
RequestStatusz5Status of a generation request through its lifecycle.pending
prefillingprefilling_splitsplit_pending_remainderdecodingfinishedfailedN)__name__
__module____qualname____doc__PENDING
PREFILLINGPREFILLING_SPLITSPLIT_PENDING_REMAINDERDECODINGFINISHEDFAILED r!   r   r#   r#   6   s*    ?GJ)7HHFr!   r#   c                       e Zd ZU dZeed<    ee      Zee	   ed<    ee      Z
ee	   ed<    ee      Zee   ed<   dZee   ed<   ej                   Zeed	<    eej$                        Zeed
<   y)GenerationOutputa5  Tracks the output of a generation request.

    Attributes:
        request_id (str): The ID of the generation request.
        prompt_ids (list[int]): The IDs of the prompt tokens.
        generated_tokens (list[int]): The generated tokens.
        logprobs (list[float]): The log probabilities of the generated tokens.
        error (Optional[str]): Any error message associated with the request. When None, the request was successful.
        status (RequestStatus): The status of the request.
        created_time (float): The time the request was created.
    
request_iddefault_factory
prompt_idsgenerated_tokenslogprobsNerrorstatuscreated_time)r+   r,   r-   r.   str__annotations__r   listr<   intr=   r>   floatr?   r   r#   r/   r@   timerA   r6   r!   r   r8   r8   B   sy    
 O!$7JS	7"'"=d3i=!$7Hd5k7E8C=)11FM1		:L%:r!   r8   c                      e Zd ZU dZeed<   dZeee	      ed<   dZ
eee	      ed<    ee      Zee	   ed<    ee      Zee	   ed<   d	Ze	ed
<   d	Ze	ed<   ej"                  Zeed<   dZe	ed<   dZe	ed<    eej*                        Zeed<   dZee   ed<   dZeeef   ed<   edefd       Zej:                  defd       Zd Zde	fdZde	fdZ e!de	de"fd       Z#d Z$d Z%y) RequestStateay  Tracks the state of a generation request through its lifecycle.

    Attributes:
        request_id (str): The ID of the generation request.
        full_prompt_ids (list[int] | None): The tokens IDs of the full prompt.
        prompt_ids (list[int] | None): The tokens IDs currently being processed.
        remaining_prompt_ids (list[int]): The tokens IDs remaining to be processed (for split requests).
        static_outputs (list[int]): The generated tokens.
        allocated_blocks (int): The number of blocks allocated to the request.
        position_offset (int): The current position in the sequence for position_ids.
        status (RequestStatus): The status of the request: can be one of PENDING, PREFILLING, PREFILLING_SPLIT,
                                SPLIT_PENDING_REMAINDER, DECODING, FINISHED, FAILED
        max_new_tokens (int): The maximum number of new tokens to generate.
        eos_token_id (int): The ID of the end-of-sequence token.
        created_time (float): The time the request was created.
        error (Optional[str]): Any error message associated with the request. When None, has had no error yet.
    r9   Nfull_prompt_idsr<   r:   remaining_prompt_idsstatic_outputsr   allocated_blocksposition_offset_status   max_new_tokenseos_token_idrA   r?   )rR   rR   lifespanr   c                     | j                   S )N)rO   selfs    r   r@   zRequestState.status|   s    ||r!   valuec                 (   | j                   t        j                  k(  r#t        j                         df| _        || _         y |t        j
                  k(  r8| j                  d   t        j                         f| _        | j                          || _         y )NrR   r   )rO   r#   r/   rG   rT   r4   log_end_of_request)rW   rX   s     r   r@   zRequestState.status   sm    <<=000!YY["-DM  m,,,!]]1-tyy{;DM##%r!   c                    t        | j                        }| j                         }| j                  d   | j                  z
  }| j                  d   | j                  z
  }t
        j                  d| j                   d|d|d|d|
       y )Nr      zRequest z finished: prefill_len = z decode_len = z start_time = z end_time = )lenrJ   generated_lenrT   rA   loggerinfor9   )rW   prefill_len
decode_len
start_timeend_times        r   rZ   zRequestState.log_end_of_request   s    $../'')
]]1%(9(99
==#d&7&77t''A;2B/J?RaT^Sbbodlcpq	
r!   c                     | j                   S )zCGet the current length of the sequence (prompt + generated tokens).)rN   rV   s    r   current_lenzRequestState.current_len   s    ###r!   c                 ,    t        | j                        S )z*Get the number of tokens generated so far.)r]   rL   rV   s    r   r^   zRequestState.generated_len   s    4&&''r!   token_idc                 .   | j                   t        j                  k7  ry|| j                  k(  xr | j                  dk7  }| j	                         | j
                  k\  }|r|r| j                  j                  |g       |s|rt        j                  | _         yy)zUpdate the request with a newly generated token and check for completion.

        Args:
            token_id: The token ID to add to the output sequence

        Returns:
            bool: True if the request is now complete, False otherwise
        FrR   T)	r@   r#   r3   rS   r^   rQ   rL   extendr4   )rW   rh   is_eos
is_max_lens       r   update_with_tokenzRequestState.update_with_token   s     ;;-000T...J43D3D3J'')T-@-@@
 6&&z2Z'00DKr!   c           
      n   d| j                    d| j                   d| j                          dt        | j                         dt        | j
                         d| j                   dt        | j                         d| j                   d	| j                   g	}d
dj                  |      z   dz   S )Nzrequest_id=zstatus=zout_tokens=zquery_length=zremaining_tokens=z
kv_length=zfull_prompt_length=zallocated_blocks=zgenerated_tokens=zRequestState(
	z,
	z
))r9   rO   r^   r]   r<   rK   rN   rJ   rM   rL   join)rW   msgs     r   __repr__zRequestState.__repr__   s    $//*+dll^$$,,./0C012D$=$= >?@--./!#d&:&:";!<= 5 567 3 345

 #W\\#%66>>r!   c                     t        | j                  | j                  | j                  | j                  g | j
                        S )z7Convert the request state to a GenerationOutput object.)r9   r<   r@   r=   r>   r?   )r8   r9   rJ   r@   rL   r?   rV   s    r   to_generation_outputz!RequestState.to_generation_output   s9    ++;;!00**
 	
r!   )&r+   r,   r-   r.   rB   rC   rJ   r   rD   rE   r<   r   rK   rL   rM   rN   r#   r/   rO   rQ   rS   rG   rA   rF   r?   rT   tuplepropertyr@   setterrZ   rf   r^   r	   boolrm   rq   rs   r6   r!   r   rI   rI   Y   sI   & O+/OXd3i(/&*Jc#*&+D&A$s)A %d ;NDI;cOS*22G]2NCL#		:L%:E8C=$,HeE5L!,   ]]M  
$S $(s (
 # $  4?	
r!   rI   )rG   dataclassesr   r   enumr   typingr   r   utils.loggingr   utils.metricsr	   	getLoggerr_   staticmethodrt   r   rE   r    r#   r8   rI   r6   r!   r   <module>r      s     (    $ # 
		5	6 Cu||S#s/J)K C C,	D 	 ; ; ;, s
 s
 s
r!   