
    dh!                         S SK rS SKrS SKrS SKJrJrJrJrJ	r	  S SK
Jr  S SKJr  S SKJr  S SKJr  \R$                  " \5      rSS.S\S	\S
\S\	\\      S\S\4S jjrS\S\S\4S jr " S S\5      rg)    N)AnyCallableListMappingOptional)CallbackManagerForLLMRun)LLM)
ConfigDictenforce_stop_tokens)stoppipelinepromptargsr   kwargsreturnc                :    U " U/UQ70 UD6nUb  [        XR5      nU$ )zInference function to send to the remote hardware.

Accepts a pipeline callable (or, more likely,
a key pointing to the model on the cluster's object store)
and returns text predictions for each document
in the batch.
r   )r   r   r   r   r   texts         \/var/www/html/shao/venv/lib/python3.13/site-packages/langchain_community/llms/self_hosted.py_generate_textr      s-     F,T,V,D"4.K    devicec                 "   [        U [        5      (       a+  [        U S5       n[        R                  " U5      n SSS5        [
        R                  R                  S5      b  SSKnUR                  R                  5       nUS:  d  X:  a  [        SU SU S35      eUS:  a  US:  a  [        R                  S	U5        UR                  U5      U l        U R                  R!                  U R                  5      U l        U $ ! , (       d  f       N= f)
z+Send a pipeline to a device on the cluster.rbNtorchr   zGot device==z', device is required to be within [-1, )zDevice has %d GPUs available. Provide device={deviceId} to `from_model_id` to use availableGPUs for execution. deviceId is -1 for CPU and can be a positive integer associated with CUDA device id.)
isinstancestropenpickleload	importlibutil	find_specr   cudadevice_count
ValueErrorloggerwarningr   modelto)r   r   fr   cuda_device_counts        r   _send_pipeline_to_devicer/   #   s    (C  (D!Q {{1~H "
 ~~(4!JJ335B;66vh '88I7J!M  A:+a/NNL "  ,,v.!**8??;O3 "!s   D  
Dc                   |  ^  \ rS rSr% SrSr\\S'   Sr\\S'   \	r
\\S'    Sr\\S'    \\S'    Sr\\   \S	'    S
S/r\\   \S'    Sr\\S'    \" SS9rS\4U 4S jjr\  SS\S\S\\\      S\S\S\4S jj5       r\S\\\4   4S j5       r\S\4S j5       r  SS\S\\\      S\\    S\S\4
S jjr!Sr"U =r#$ ) SelfHostedPipelineB   a  Model inference on self-hosted remote hardware.

Supported hardware includes auto-launched instances on AWS, GCP, Azure,
and Lambda, as well as servers specified
by IP address and SSH credentials (such as on-prem, or another
cloud like Paperspace, Coreweave, etc.).

To use, you should have the ``runhouse`` python package installed.

Example for custom pipeline and inference functions:
    .. code-block:: python

        from langchain_community.llms import SelfHostedPipeline
        from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
        import runhouse as rh

        def load_pipeline():
            tokenizer = AutoTokenizer.from_pretrained("gpt2")
            model = AutoModelForCausalLM.from_pretrained("gpt2")
            return pipeline(
                "text-generation", model=model, tokenizer=tokenizer,
                max_new_tokens=10
            )
        def inference_fn(pipeline, prompt, stop = None):
            return pipeline(prompt)[0]["generated_text"]

        gpu = rh.cluster(name="rh-a10x", instance_type="A100:1")
        llm = SelfHostedPipeline(
            model_load_fn=load_pipeline,
            hardware=gpu,
            model_reqs=model_reqs, inference_fn=inference_fn
        )
Example for <2GB model (can be serialized and sent directly to the server):
    .. code-block:: python

        from langchain_community.llms import SelfHostedPipeline
        import runhouse as rh
        gpu = rh.cluster(name="rh-a10x", instance_type="A100:1")
        my_model = ...
        llm = SelfHostedPipeline.from_pipeline(
            pipeline=my_model,
            hardware=gpu,
            model_reqs=["./", "torch", "transformers"],
        )
Example passing model path for larger models:
    .. code-block:: python

        from langchain_community.llms import SelfHostedPipeline
        import runhouse as rh
        import pickle
        from transformers import pipeline

        generator = pipeline(model="gpt2")
        rh.blob(pickle.dumps(generator), path="models/pipeline.pkl"
            ).save().to(gpu, path="models")
        llm = SelfHostedPipeline.from_pipeline(
            pipeline="models/pipeline.pkl",
            hardware=gpu,
            model_reqs=["./", "torch", "transformers"],
        )
Npipeline_refclientinference_fnhardwaremodel_load_fnload_fn_kwargsz./r   
model_reqsFallow_dangerous_deserializationforbid)extrar   c                   > UR                  S5      (       d  [        S5      e[        TU ]  " S0 UD6   SSKnUR                  U R                  S9R                  U R                  U R                  S9nU R                  =(       d    0 nUR                  " S0 UD6U l        UR                  U R                  S9R                  U R                  U R                  S9U l        g! [
         a    [        S5      ef = f)	zInit the pipeline with an auxiliary function.

The load function must be in global scope to be imported
and run on the server, i.e. in a module and not a REPL or closure.
Then, initialize the remote inference function.
r:   aQ  SelfHostedPipeline relies on the pickle module. You will need to set allow_dangerous_deserialization=True if you want to opt-in to allow deserialization of data using pickle.Data can be compromised by a malicious actor if not handled properly to include a malicious payload that when deserialized with pickle can execute arbitrary code. r   NzXCould not import runhouse python package. Please install it with `pip install runhouse`.)fn)reqs )getr(   super__init__runhouseImportErrorfunctionr7   r,   r6   r9   r8   remoter3   r5   r4   )selfr   rhremote_load_fn_load_fn_kwargs	__class__s        r   rC   SelfHostedPipeline.__init__   s     zz;<<6  	"6"	! (:(:;>>MM ? 
 --3*11DODkkT%6%6k7::MM ; 
  	A 	s   C   C6r   r   r   c           	          [        U[        5      (       d  [        R                  S5        XS.nU " SU[        USS/U=(       d    / -   S.UD6$ )z=Init the SelfHostedPipeline from a pipeline object or string.zSerializing pipeline to send to remote hardware. Note, it can be quite slowto serialize and send large models with each execution. Consider sending the pipelineto the cluster and passing the path to the pipeline instead.)r   r   transformersr   )r8   r7   r6   r9   r@   )r   r   r)   r*   r/   )clsr   r6   r9   r   r   r8   s          r   from_pipeline SelfHostedPipeline.from_pipeline   sd     (C((NNO '/A 
)2&0J4D"E	

 
 	
r   c                 "    0 SU R                   0E$ )zGet the identifying parameters.r6   )r6   rH   s    r   _identifying_params&SelfHostedPipeline._identifying_params   s    
4==)
 	
r   c                     g)Nself_hosted_llmr@   rT   s    r   	_llm_typeSelfHostedPipeline._llm_type   s     r   r   r   run_managerc                 @    U R                   " SU R                  XS.UD6$ )N)r   r   r   r@   r4   r3   )rH   r   r   r[   r   s        r   _callSelfHostedPipeline._call   s-     {{ 
&&v
DJ
 	
r   r]   )Nr   )NN)$__name__
__module____qualname____firstlineno____doc__r3   r   __annotations__r4   r   r5   r   r6   r8   r   dictr9   r   r   r:   boolr
   model_configrC   classmethodintr	   rQ   propertyr   rU   rY   r   r^   __static_attributes____classcell__)rL   s   @r   r1   r1   B   s   <| L#FC+L(+<Hc<<%)NHTN)?!7OJS	+E,1#T1 L#
 #
J 
 +/

 
 T#Y'	

 
 
 

 
6 
WS#X%6 
 
 !3 ! ! %):>		
	
 tCy!	
 67		

 	
 
	
 	
r   r1   )importlib.utilr#   loggingr!   typingr   r   r   r   r   langchain_core.callbacksr   #langchain_core.language_models.llmsr	   pydanticr
   langchain_community.llms.utilsr   	getLoggerr`   r)   r   r   rj   r/   r1   r@   r   r   <module>rv      s       9 9 = 3  >			8	$ !%	  49
	
  	(s C C >j
 j
r   