o
    
sht=                     @   s   d dl Z d dlZd dlZd dlmZ d dlmZmZmZm	Z	 d dl
Z
G dd deZzd dlmZ d dlmZmZmZ dZW n eyI   d	ZY nw dd
dZ	ddd	dddeeeeee	eeegef f f   fddZeeZe G dd dZdS )    N)Enum)AnyCallableOptionalUnionc                   @   s,   e Zd ZdZdZdZdZdZdZdZ	dZ
d	S )
RequestStatusz5Status of a generation request through its lifecycle.pending
prefillingprefilling_splitsplit_pending_remainderdecodingfinishedfailedN)__name__
__module____qualname____doc__PENDING
PREFILLINGPREFILLING_SPLITSPLIT_PENDING_REMAINDERDECODINGFINISHEDFAILED r   r   X/var/www/html/alpaca_bot/venv/lib/python3.10/site-packages/transformers/utils/metrics.pyr   
   s    r   )metrics)Status
StatusCode
get_tracerTFc                    s   t sdd S  fdd}|S )a  
    Decorator that attaches a tracer to a class.

    This decorator should be applied to classes that need OpenTelemetry tracing.
    It adds a tracer attribute to the class instance that can be used by the traced decorator.

    Args:
        tracer_name_template: Optional template string for the tracer name.
            If provided, it should contain {module} which will be replaced with the class's full module path
            and {class_name} for the class name.
            If None, a default naming scheme will be used where:
              - If the module already starts with "transformers.", it will use that directly
              - Otherwise, it will prepend "transformers." to the module name

    Returns:
        Class decorator function
    c                 S   s   | S Nr   )clsr   r   r   <lambda>2   s    zattach_tracer.<locals>.<lambda>c                    s*    j t fdd}| _  S )Nc                    sr   | g|R i |  j } j}d u r+|dr"| d| }nd| d| }nj||d}t|| _d S )Ntransformers..)module
class_name)r   r   
startswithformatr   tracer)selfargskwargsmodule_namer&   tracer_name)r!   original_inittracer_name_templater   r   init_with_tracer7   s   
z:attach_tracer.<locals>.decorator.<locals>.init_with_tracer)__init__	functoolswraps)r!   r1   r0   )r!   r/   r   	decorator4   s
   z attach_tracer.<locals>.decorator)_has_opentelemetry)r0   r6   r   r5   r   attach_tracer   s   r8   )	span_name
standaloneadditional_attributesr;   c                   s$    fdd}| du r|S || S )a  
    Decorator to trace function calls with OpenTelemetry.

    Can be used as @traced or @traced(span_name="custom_name")

    Args:
        func: The function to trace
        span_name: Optional custom name for the span (defaults to function name)
        standalone: If True, creates a parentless span
        additional_attributes: Optional list of additional attributes to set on the span.
          Each item is a tuple of (instance_attribute_name, span_attribute_key, value_or_transform_function)
          where:
            - instance_attribute_name: Name of the attribute to get from the class instance
            - span_attribute_key: Key to use when setting the attribute on the span
            - value_or_transform_function: Either a raw value to use directly, or a function to transform
              the attribute value before setting it on the span

    Returns:
        Decorated function with tracing
    c                    s0   t s S dd l}|  fdd}|S )Nr   c                     s,  | rt drjd ur| d nd }|d u}|r!t |dr!|j}ntdj dj }p1j}r7|jn|j}||}|dj |dj |d| | rt	| D ]*\}}	t
|	ttttfsj|	d u rv|d	| t|	 qY|d	| tt|	 qY|r| D ]*\}
}t
|ttttfs|d u r|d
|
 t| q|d
|
 tt| q r|rވ D ]"}|\}}}t ||rt||}t|r||}n|}||| qz| i |}|W W  d    S  ty
 } z|ttj ||  d }~ww 1 sw   Y  d S )N__self__r   r)   r#   r$   zfunction.namezfunction.modulezfunction.is_methodzargs.zkwargs.)hasattrr<   r)   r   r   r   
start_spanstart_as_current_spanset_attribute	enumerate
isinstancestrintfloatbooltypeitemsgetattrcallable	Exception
set_statusr   r   ERRORrecord_exception)r+   r,   instance	is_methodr)   namespan_fnspaniargkeyvalueattr_configinstance_attribute_namespan_attribute_keyvalue_or_transform_functionattribute_valuetransformed_valueresulte)r;   funcr9   r:   r   r   wrapperp   sT   $





 
z*traced.<locals>.decorator.<locals>.wrapper)r7   r3   r4   )r`   r3   ra   r;   r9   r:   )r`   r   r6   j   s   0ztraced.<locals>.decoratorNr   )r`   r9   r:   r;   r6   r   rb   r   tracedN   s   9rc   c                   @   s   e Zd ZdZdefddZdd Zedede	d	d
fddZ
eded	d
fddZedddZededed	d
fddZedede	d	d
fddZd
S )ContinuousBatchProcessorMetricsz0Metrics collection for ContinuousBatchProcessor.max_batch_tokensc                 C   s   || _ |   dS )zInitialize metrics for continuous batch processor.

        Args:
            max_batch_tokens: Maximum number of tokens in a batch
        N)re   _setup_metrics)r*   re   r   r   r   r2      s   z(ContinuousBatchProcessorMetrics.__init__c                 C   s  t s	td dS td| _g d}| jjddd|d| _| jjd	d
dd| _	| jjdddd| _
g d}| jjddd|d| _| jjdddd| _| jjdddd| _| jjdddd| _g d}| jjddd|d| _| jjddd d| _| jjd!d"d d| _dS )#zIInitialize OpenTelemetry metrics and tracing if the library is available.zIOpenTelemetry is not installed. Metrics and tracing will not be recorded.Nz2transformers.generation.continuous_batch_processor)
      2   K   d         i,    i        '  ttft_millisecondsz#Time to first token in millisecondsms)rQ   descriptionunit#explicit_bucket_boundaries_advisoryactive_requests_countz3Number of active requests currently being processedrequests)rQ   ru   rv   waiting_requests_countz*Number of requests waiting to be processed)ri   rk      rn   ro   rp   rq   rr   i N  i0u  i`  request_latency_millisecondsz9End-to-end latency for completed requests in millisecondsdecode_prefill_ratioz3Ratio of decode tokens to prefill tokens in a batchratioprefill_tokens_processedz"Number of prefill tokens processedtokensdecode_tokens_processedz!Number of decode tokens processed)   rg         (   ri   <   F   P   Z   _   b   rk   batch_fill_percentagez5Percentage of max_batch_tokens utilized in each batchpercentkv_cache_free_memory_bytesz/Free memory of the PagedAttentionCache in bytesbyteskv_cache_memory_bytesz0Memory usage of the PagedAttentionCache in bytes)r7   loggerinfor   	get_metermetercreate_histogramttft_histogramcreate_gaugeactive_requests_gaugewaiting_requests_gaugerequest_latency_histogramdecode_prefill_ratio_gaugecreate_counterprefill_tokens_counterdecode_tokens_counterbatch_fill_percentage_histogramkv_cache_free_memory_gaugekv_cache_memory_gauge)r*   ttft_bucketslatency_bucketsbatch_fill_bucketsr   r   r   rf      sx   
z.ContinuousBatchProcessorMetrics._setup_metricscreated_time
request_idreturnNc              
   C   |   t sdS t | d }z| j| td| d|dd W dS  ty= } ztd|  W Y d}~dS d}~ww )zRecord Time to First Token (TTFT).

        Args:
            created_time: The time the request was created
            request_id: The ID of the request
        N     @@zRecorded TTFT for request : .2frt   zFailed to record TTFT metric: )r7   timer   recordr   debugrK   warning)r*   r   r   ttft_msr_   r   r   r   record_ttft_metric
  s    z2ContinuousBatchProcessorMetrics.record_ttft_metricrequests_in_batchc           	      C   s&  t r|sdS d}d}|D ]}|jtjkr|d7 }q|jtjtjfv r)|t|j7 }q|| }zI|dkr9| j	| |dkrC| j
	| |dkrQ|| }| j| || j d }| j| td| d| d|dd	| d
| j d W dS  ty } ztd|  W Y d}~dS d}~ww )zRecord metrics about the batch composition including decode/prefill ratio and batch fill percentage.

        Args:
            requests_in_batch: List of request states in the current batch
        Nr      g      Y@zBatch metrics: z decode tokens, z prefill tokens, batch fill: r   z% (/)z Failed to record batch metrics: )r7   statusr   r   r   r   len
prompt_idsr   addr   r   setre   r   r   r   r   rK   r   )	r*   r   decode_tokensprefill_tokensstatetotal_batch_tokensr~   fill_percentager_   r   r   r   record_batch_metrics  sD   

z4ContinuousBatchProcessorMetrics.record_batch_metricsc                 C   s  t sdS zj|jt|j }t|j}|jtjtjfv rdnd}|| |j	 |j
 |j d | }|t|j |j	 |j
 |j d | }| j| | j| td|d dd| d|j d	||j d
 dd	 W dS  ty } ztd|  W Y d}~dS d}~ww )a&  Record memory usage of the PagedAttentionCache without GPU synchronization.

        This calculates the theoretical memory usage based on cache configuration
        and the number of blocks currently in use.

        Args:
            cache: The PagedAttentionCache object to measure
        N      zKV Cache memory: i   r   zMB, Used blocks: r   z (rk   z.1fz%)z*Failed to record KV cache memory metrics: )r7   
num_blocksr   _free_blocks	key_cachedtypetorchfloat16bfloat16
block_sizenum_key_value_headshead_dimr   r   r   r   r   rK   r   )r*   cachenum_used_blocks
num_layersbytes_per_parametermemory_bytesfree_memory_bytesr_   r   r   r   record_kv_cache_memory_metricsF  sb   



z>ContinuousBatchProcessorMetrics.record_kv_cache_memory_metricsactive_requestswaiting_requestsc              
   C   sv   t sdS z| j| | j| td| d| d W dS  ty: } ztd|  W Y d}~dS d}~ww )zRecord metrics about active and waiting requests.

        Args:
            active_requests: Number of active requests
            waiting_requests: Number of waiting requests
        NzQueue metrics: z active requests, z waiting requestsz Failed to record queue metrics: )r7   r   r   r   r   r   rK   r   )r*   r   r   r_   r   r   r   record_queue_metrics|  s   z4ContinuousBatchProcessorMetrics.record_queue_metricsc              
   C   r   )zRecord metrics about a completed request.

        Args:
            created_time: The time the request was created
            request_id: The ID of the request
        Nr   z Recorded request completion for r   r   rt   z,Failed to record request completion metric: )r7   r   r   r   r   r   rK   r   )r*   r   r   
latency_msr_   r   r   r   record_request_completion  s    z9ContinuousBatchProcessorMetrics.record_request_completion)r   N)r   r   r   r   rD   r2   rf   rc   rE   rC   r   listr   r   r   r   r   r   r   r   rd      s    
Q(5rd   r    )r3   loggingr   enumr   typingr   r   r   r   r   r   opentelemetryr   opentelemetry.tracer   r   r   r7   ImportErrorr8   r   tuplerC   rc   	getLoggerr   r   rd   r   r   r   r   <module>   s4    
0&

Z