
    dhg$                         S SK Jr  S SKJrJrJrJr  S SKJr  S SK	J
r
  S SKJr  S SKJrJr  S SKJr   " S S	\\5      r " S
 S\5      r " S S\
5      rg)    )Enum)AnyIteratorListOptional)CallbackManagerForLLMRun)LLM)GenerationChunk)	BaseModel
ConfigDict)enforce_stop_tokensc                        \ rS rSrSrSrSrSrg)Device   z,The device to use for inference, cuda or cpucudacpu N)__name__
__module____qualname____firstlineno____doc__r   r   __static_attributes__r       ^/var/www/html/shao/venv/lib/python3.13/site-packages/langchain_community/llms/titan_takeoff.pyr   r      s    6D
Cr   r   c                       \ rS rSr% Sr\" SS9r\\S'    \	R                  r\	\S'    Sr\\S'    S	r\\   \S
'    Sr\\S'    Sr\\S'   Srg	)ReaderConfig   zAConfiguration for the reader to be deployed in Titan Takeoff API.r   )protected_namespaces
model_namedeviceprimaryconsumer_groupNtensor_paralleli   max_seq_length   max_batch_size)r   r   r   r   r   r   model_configstr__annotations__r   r   r!   r#   r$   r   intr%   r'   r   r   r   r   r   r      se    KL O&[[FF 6#NC#5%)OXc])INCKNC@r   r   c                   8  ^  \ rS rSr% SrSr\\S'    Sr\	\S'    Sr
\	\S'    S	r\\S
'    Sr\\S'    SSSS	/ 4S\S\	S\	S
\S\\   4
U 4S jjjr\S\4S j5       r  SS\S\\\      S\\   S\S\4
S jjr  SS\S\\\      S\\   S\S\\   4
S jjrSrU =r$ )TitanTakeoff-   a  Titan Takeoff API LLMs.

Titan Takeoff is a wrapper to interface with Takeoff Inference API for
generative text to text language models.

You can use this wrapper to send requests to a generative language model
and to deploy readers with Takeoff.

Examples:
    This is an example how to deploy a generative language model and send
    requests.

    .. code-block:: python
        # Import the TitanTakeoff class from community package
        import time
        from langchain_community.llms import TitanTakeoff

        # Specify the embedding reader you'd like to deploy
        reader_1 = {
            "model_name": "TheBloke/Llama-2-7b-Chat-AWQ",
            "device": "cuda",
            "tensor_parallel": 1,
            "consumer_group": "llama"
        }

        # For every reader you pass into models arg Takeoff will spin
        # up a reader according to the specs you provide. If you don't
        # specify the arg no models are spun up and it assumes you have
        # already done this separately.
        llm = TitanTakeoff(models=[reader_1])

        # Wait for the reader to be deployed, time needed depends on the
        # model size and your internet speed
        time.sleep(60)

        # Returns the query, ie a List[float], sent to `llama` consumer group
        # where we just spun up the Llama 7B model
        print(embed.invoke(
            "Where can I see football?", consumer_group="llama"
        ))

        # You can also send generation parameters to the model, any of the
        # following can be passed in as kwargs:
        # https://docs.titanml.co/docs/next/apis/Takeoff%20inference_REST_API/generate#request
        # for instance:
        print(embed.invoke(
            "Where can I see football?", consumer_group="llama", max_new_tokens=100
        ))
zhttp://localhostbase_urli  porti  	mgmt_portF	streamingNclientmodelsc                    > [         TU ]  XX4S9   SSKJn  U" U R
                  U R                  U R                  S9U l        U H  nU R                  R                  U5        M      g! [         a    [	        S5      ef = f)a  Initialize the Titan Takeoff language wrapper.

Args:
    base_url (str, optional): The base URL where the Takeoff
        Inference Server is listening. Defaults to `http://localhost`.
    port (int, optional): What port is Takeoff Inference API
        listening on. Defaults to 3000.
    mgmt_port (int, optional): What port is Takeoff Management API
        listening on. Defaults to 3001.
    streaming (bool, optional): Whether you want to by default use the
        generate_stream endpoint over generate to stream responses.
        Defaults to False. In reality, this is not significantly different
        as the streamed response is buffered and returned similar to the
        non-streamed response, but the run manager is applied per token
        generated.
    models (List[ReaderConfig], optional): Any readers you'd like to
        spin up on. Defaults to [].

Raises:
    ImportError: If you haven't installed takeoff-client, you will
    get an ImportError. To remedy run `pip install 'takeoff-client==0.4.0'`
)r/   r0   r1   r2   r   )TakeoffClientzjtakeoff-client is required for TitanTakeoff. Please install it with `pip install 'takeoff-client>=0.4.0'`.)r0   r1   N)
super__init__takeoff_clientr6   ImportErrorr/   r0   r1   r3   create_reader)	selfr/   r0   r1   r2   r4   r6   model	__class__s	           r   r8   TitanTakeoff.__init__o   s    < 	I 	 	
	4 $MM		T^^
 EKK%%e,   	P 	s   A' 'A=returnc                     g)zReturn type of llm.titan_takeoffr   )r<   s    r   	_llm_typeTitanTakeoff._llm_type   s     r   promptstoprun_managerkwargsc                     U R                   (       a*  SnU R                  UUUS9 H  nXVR                  -  nM     U$ U R                  R                  " U40 UD6nUS   nUb  [        X5      nU$ )aE  Call out to Titan Takeoff (Pro) generate endpoint.

Args:
    prompt: The prompt to pass into the model.
    stop: Optional list of stop words to use when generating.
    run_manager: Optional callback manager to use when streaming.

Returns:
    The string generated by the model.

Example:
    .. code-block:: python

        model = TitanTakeoff()

        prompt = "What is the capital of the United Kingdom?"

        # Use of model(prompt), ie `__call__` was deprecated in LangChain 0.1.7,
        # use model.invoke(prompt) instead.
        response = model.invoke(prompt)

 )rE   rF   rG   text)r2   _streamrK   r3   generater   )	r<   rE   rF   rG   rH   text_outputchunkresponserK   s	            r   _callTitanTakeoff._call   s    : >>K' & 
 zz) ;;''9&9&t2Dr   c              +   :  #    U R                   R                  " U40 UD6nSnU H  nXgR                  -  nSU;   d  M  UR                  S5      (       a  Sn[	        UR                  SS5      5      S:X  a%  UR                  SS5      u  pUR                  S5      nU(       d  M~  [        US9n
SnU(       a  UR                  U
R                  S9  U
v   M     U(       a>  [        UR                  SS5      S9n
U(       a  UR                  U
R                  S9  U
v   g	g	7f)
a:  Call out to Titan Takeoff (Pro) stream endpoint.

Args:
    prompt: The prompt to pass into the model.
    stop: Optional list of stop words to use when generating.
    run_manager: Optional callback manager to use when streaming.

Yields:
    A dictionary like object containing a string token.

Example:
    .. code-block:: python

        model = TitanTakeoff()

        prompt = "What is the capital of the United Kingdom?"
        response = model.stream(prompt)

        # OR

        model = TitanTakeoff(streaming=True)

        response = model.invoke(prompt)

rJ   zdata:      
)rK   )tokenz</s>N)r3   generate_streamdata
startswithlensplitrstripr
   on_llm_new_tokenrK   replace)r<   rE   rF   rG   rH   rP   bufferrK   content_rO   s              r   rL   TitanTakeoff._stream   s     @ ;;..v@@DiiF& $$W--Fv||GQ/0A5!'gq!9JG$^^D1F6+8EF"#445::4FK $ #)CDE,,5::,>K	 s   7DA!D"A9D)r3   )NN)r   r   r   r   r   r/   r)   r*   r0   r+   r1   r2   boolr3   r   r   r   r8   propertyrC   r   r   rQ   r   r
   rL   r   __classcell__)r>   s   @r   r-   r-   -   s[   0d 'Hc&WD#EIsPIt8FCE +%',-,- ,- 	,-
 ,- \",- ,-\ 3   %):>	,, tCy!, 67	,
 , 
,b %):>	88 tCy!8 67	8
 8 
/	"8 8r   r-   N)enumr   typingr   r   r   r   langchain_core.callbacksr   #langchain_core.language_models.llmsr	   langchain_core.outputsr
   pydanticr   r   langchain_community.llms.utilsr   r)   r   r   r-   r   r   r   <module>rn      sG     0 0 = 3 2 * >S$ A9 A4[3 [r   