
    dh3                        S SK Jr  S SKrS SKJrJrJrJrJrJ	r	J
r
JrJrJr  S SKrS SKrS SKJrJr  S SKJr  S SKJr  \(       a  S SKJrJr  S SKJr   " S	 S
\5      rg)    )annotationsN)
TYPE_CHECKINGAnyAsyncIteratorDict	GeneratorIteratorListMappingOptionalUnion)AsyncCallbackManagerForLLMRunCallbackManagerForLLMRun)LLM)GenerationChunk)RESTfulChatModelHandleRESTfulGenerateModelHandle)LlamaCppGenerateConfigc                    ^  \ rS rSr% SrSrS\S'   S\S'    S\S'    S	\S
'       S       SU 4S jjjr\SS j5       r	\SS j5       r
SS jr  S         SS jjr  S         SS jjr  S         SS jjr S      S!S jjr\    S"S j5       r  S         S#S jjr S      S$S jjrSrU =r$ )%
Xinference   a9  `Xinference` large-scale model inference service.

To use, you should have the xinference library installed:

.. code-block:: bash

   pip install "xinference[all]"

If you're simply using the services provided by Xinference, you can utilize the xinference_client package:

.. code-block:: bash

    pip install xinference_client

Check out: https://github.com/xorbitsai/inference
To run, you need to start a Xinference supervisor on one server and Xinference workers on the other servers

Example:
    To start a local instance of Xinference, run

    .. code-block:: bash

       $ xinference

    You can also deploy Xinference in a distributed cluster. Here are the steps:

    Starting the supervisor:

    .. code-block:: bash

       $ xinference-supervisor

    Starting the worker:

    .. code-block:: bash

       $ xinference-worker

Then, launch a model using command line interface (CLI).

Example:

.. code-block:: bash

   $ xinference launch -n orca -s 3 -q q4_0

It will return a model UID. Then, you can use Xinference with LangChain.

Example:

.. code-block:: python

    from langchain_community.llms import Xinference

    llm = Xinference(
        server_url="http://0.0.0.0:9997",
        model_uid = {model_uid} # replace model_uid with the model UID return from launching the model
    )

    llm.invoke(
        prompt="Q: where can we visit in the capital of France? A:",
        generate_config={"max_tokens": 1024, "stream": True},
    )

Example:

.. code-block:: python

    from langchain_community.llms import Xinference
    from langchain.prompts import PromptTemplate

    llm = Xinference(
        server_url="http://0.0.0.0:9997",
        model_uid={model_uid}, # replace model_uid with the model UID return from launching the model
        stream=True
    )
    prompt = PromptTemplate(
        input=['country'],
        template="Q: where can we visit in the capital of {country}? A:"
    )
    chain = prompt | llm
    chain.stream(input={'country': 'France'})


To view all the supported builtin models, run:

.. code-block:: bash

    $ xinference list --all

NzOptional[Any]clientOptional[str]
server_url	model_uidzDict[str, Any]model_kwargsc                  >  SSK Jn  U=(       d    0 n[        TU ]  " S
0 UUUS.D6  U R                  c  [        S5      eU R                  c  [        S5      e0 U l	        SU l
        U R                  5         Ub#  U R                  (       a  SU 3U R                  S	'   U" X5      U l        g ! [         a(     SSKJn   N! [         a  n[        S5      UeS nAff = ff = f)Nr   )RESTfulClientzCould not import RESTfulClient from xinference. Please install it with `pip install xinference` or `pip install xinference_client`.r   r   r   zPlease provide server URLzPlease provide the model UIDFzBearer Authorization )xinference.clientr   ImportErrorxinference_clientsuper__init__r   
ValueErrorr   _headers_cluster_authed_check_cluster_authenticatedr   )selfr   r   api_keyr   r   e	__class__s          [/var/www/html/shao/venv/lib/python3.13/site-packages/langchain_community/llms/xinference.pyr&   Xinference.__init__   s    		7 $)r 	
(& ,	
 ??"899>>!;<<(*$))+4#7#7/6wi-@DMM/*#J8?  	; !Y 	s)   B* *
C5B==
CCCCc                    g)zReturn type of llm.
xinferencer!   r+   s    r/   	_llm_typeXinference._llm_type   s         c                Z    0 SU R                   0ESU R                  0ESU R                  0E$ )zGet the identifying parameters.r   r   r   r   r3   s    r/   _identifying_paramsXinference._identifying_params   sC    
T__-
DNN+
 t001
 	
r6   c                "   U R                    S3n[        R                  " U5      nUR                  S:X  a  SU l        g UR                  S:w  a  [        SUR                  5       S    35      eUR                  5       n[        US   5      U l        g )Nz/v1/cluster/auth  F   z+Failed to get cluster information, detail: detailauth)r   requestsgetstatus_coder)   RuntimeErrorjsonbool)r+   urlresponseresponse_datas       r/   r*   'Xinference._check_cluster_authenticated   s    !!12<<$3&#(D ##s*"'}}x89;  %MMOM#'f(=#>D r6   c                   U R                   c  [        S5      eU R                   R                  U R                  5      nUR	                  S0 5      n0 U R
                  EUEnU(       a  X&S'   U(       a7  UR	                  S5      (       a!  SnU R                  UUUUS9 H  nXx-  nM	     U$ UR                  XS9n	U	S   S	   S
   $ )a1  Call the xinference model and return the output.

Args:
    prompt: The prompt to use for generation.
    stop: Optional list of stop words to use when generating.
    generate_config: Optional dictionary for the configuration used for
        generation.

Returns:
    The generated string by the model.
Client is not initialized!generate_configstopstream )modelpromptrun_managerrK   rP   rK   choicesr   text)r   r'   	get_modelr   r@   r   _stream_generategenerate)
r+   rP   rL   rQ   kwargsrO   rK   combined_text_outputtoken
completions
             r/   _callXinference._call   s    $ ;;9::%%dnn54:JJ?PRT4UBT..B/B&*F#228<<#% ..' /	 /  %-$ (' vWJi(+F33r6   c              #  h  #    UR                  X$S9nU H  n[        U[        5      (       d  M  UR                  S/ 5      nU(       d  M5  US   n[        U[        5      (       d  MQ  UR                  SS5      n	UR                  S5      n
U(       a  UR	                  XR
                  U
S9  U	v   M     g7f)	a  
Args:
    prompt: The prompt to use for generation.
    model: The model used for generation.
    stop: Optional list of stop words to use when generating.
    generate_config: Optional dictionary for the configuration used for
        generation.

Yields:
    A string token.
rR   rS   r   rT   rN   logprobs)rZ   verbose	log_probsN)rW   
isinstancedictr@   on_llm_new_tokenr`   )r+   rO   rP   rQ   rK   streaming_responsechunkrS   choicerZ   ra   s              r/   rV   Xinference._stream_generate   s     $ #^^ , 
 (E%&&))Ir27$QZF!&$// &

62 6$*JJz$:	&'88&+\\Y 9  $ (s   (B2B2	B2%AB2c              +  (  #    UR                  S0 5      n0 U R                  EUEnU(       a  X%S'   U R                  X5       HL  nU(       d  M  U R                  U5      nU(       a$  UR	                  UR
                  U R                  S9  Uv   MN     g 7fNrK   rL   )r`   )r@   r   _create_generate_stream$_stream_response_to_generation_chunkrd   rT   r`   r+   rP   rL   rQ   rX   rK   stream_resprf   s           r/   _streamXinference._stream  s      !**%6;BT..B/B&*F#77PK{AA+N00

 $ 1   Qs   ABABc              #     #    U R                   c  [        S5      eU R                   R                  U R                  5      nUR	                  XS9 S h  vN   g  N7f)NrJ   rR   )r   r'   rU   r   rW   )r+   rP   rK   rO   s       r/   rk   "Xinference._create_generate_stream.  sI      ;;9::%%dnn5>>>QQQs   AAAAc           
     \   Sn[        U [        5      (       a  U R                  S/ 5      nU(       ai  US   n[        U[        5      (       aD  UR                  SS5      n[        U[        UR                  SS5      UR                  SS5      S9S	9$ [	        S
5      e[        US9$ [	        S5      e)z0Convert a stream response to a generation chunk.rN   rS   r   rT   finish_reasonNr_   )rt   r_   )rT   generation_infozchoice type error!)rT   zstream_response type error!)rb   rc   r@   r   	TypeError)stream_responserZ   rS   rg   s       r/   rl   /Xinference._stream_response_to_generation_chunk6  s    
 ot,,%)))R8G fd++"JJvr2E*"(,*0**_d*K%+ZZ
D%A)  $$899&E229::r6   c               L  #    UR                  S0 5      n0 U R                  EUEnU(       a  X%S'   U R                  X5        S h  vN nU(       d  M  U R                  U5      nU(       a,  UR	                  UR
                  U R                  S9I S h  vN   U7v   M]   NX N
 g 7frj   )r@   r   _acreate_generate_streamrl   rd   rT   r`   rm   s           r/   _astreamXinference._astreamQ  s      !**%6;BT..B/B&*F#!%!>!>v!W 	+{AA+N%66

 $ 7    		 "XsA   ?B$B"BB"	B$?B$B B$B" B$"B$c                 #    U R                   US.nUb  UR                  5        H	  u  pEXSU'   M     [        U=(       a    UR                  S5      5      n[        R
                  " 5        IS h  vN nUR                  U R                   S3US9 IS h  vN nUR                  S:w  aB  UR                  S:X  a  [        S5      eUR                  n	[        SUR                   S	U	 35      eUR                    S h  vN n
U(       d  [        R                  " U
5      7v   M+  U
R                  S
5      nU
R!                  S5      (       d  MT  U[#        S5      S  R%                  5       nU(       d  My  [        R                  " U5      7v   M   GN N N
 S S S 5      IS h  vN    O! , IS h  vN  (       d  f       O= fS S S 5      IS h  vN    g ! , IS h  vN  (       d  f       g = f7f)N)rO   rP   rM   z/v1/completions)rE   rC   r<   r;   z)astream call failed with status code 404.z%astream call failed with status code z. Details: zutf-8s   data:)r   itemsrD   r@   aiohttpClientSessionpostr   statusFileNotFoundErrorrT   r'   contentrC   loadsdecode
startswithlenstrip)r+   rP   rK   request_bodykeyvaluerM   sessionrF   optional_detaillinejson_strs               r/   rz   #Xinference._acreate_generate_streamf  s     266'R&-335
$)S! 6 oG/*=*=h*GH((**g||'7! $   ??c)#-/G  +3--(CHOOCT U))8(9; 
 #+"2"2 	7$!"jj..#';;w#7??844'/H'@'F'F'HH#+ ("&**X"665 + 	7"2!      +*****s   A,G+.F/G+2"GFGAF%7F;F
<F?AF%A F%G+GFF%GF!G%F<	+F.,F<	8G?G+
GG+G(GG($G+)r)   r(   r   )NNN)r   r   r   r   r,   r   r   r   )returnstr)r   zMapping[str, Any])r   None)NN)
rP   r   rL   Optional[List[str]]rQ   "Optional[CallbackManagerForLLMRun]rX   r   r   r   )
rO   z=Union['RESTfulGenerateModelHandle', 'RESTfulChatModelHandle']rP   r   rQ   r   rK   z"Optional['LlamaCppGenerateConfig']r   zGenerator[str, None, None])
rP   r   rL   r   rQ   r   rX   r   r   zIterator[GenerationChunk])N)rP   r   rK   Optional[Dict[str, List[str]]]r   zIterator[str])rw   r   r   r   )
rP   r   rL   r   rQ   z'Optional[AsyncCallbackManagerForLLMRun]rX   r   r   zAsyncIterator[GenerationChunk])rP   r   rK   r   r   zAsyncIterator[str])__name__
__module____qualname____firstlineno____doc__r   __annotations__r&   propertyr4   r8   r*   r\   rV   ro   rk   staticmethodrl   r{   rz   __static_attributes____classcell__)r.   s   @r/   r   r      s   Zx !FM &#  : %)#'!%	(9!(9 !(9 	(9
 (9 (9T   
 
?" %):>	*4*4 "*4 8	*4
 *4 
*4` ;?>B!$L!$ !$ 8	!$
 <!$ 
$!$L %):>	 " 8	
  
#, NRRR,JR	R ;;	; ;: %)?C	 " =	
  
(, NR#7#7,J#7	#7 #7r6   r   )
__future__r   rC   typingr   r   r   r   r   r	   r
   r   r   r   r   r?   langchain_core.callbacksr   r   #langchain_core.language_models.llmsr   langchain_core.outputsr   r"   r   r   xinference.model.llm.corer   r   r!   r6   r/   <module>r      sF    "      4 2T@j7 j7r6   