
    dh"                     v    S SK r S SKJrJrJrJr  S SKJr  S SKJ	r	  S SK
Jr  S SKJr  SrSrS	r " S
 S\	5      rg)    N)AnyListMappingOptional)CallbackManagerForLLMRun)LLM)
ConfigDict)enforce_stop_tokenszgoogle/flan-t5-largetext2text-generation)r   text-generationsummarizationc                   ^   \ rS rSr% SrSr\\S'   \r	\
\S'    Sr\\   \S'    Sr\\   \S'    \" SS	9r\       SS\
S
\
S\\   S\\
   S\\   S\\   S\\   S\\   S\\   S\S\4S jj5       r\S\\
\4   4S j5       r\S\
4S j5       r  SS\
S\\\
      S\\   S\S\
4
S jjrSrg)WeightOnlyQuantPipeline   aq  Weight only quantized model.

To use, you should have the `intel-extension-for-transformers` packabge and
    `transformers` package installed.
intel-extension-for-transformers:
    https://github.com/intel/intel-extension-for-transformers

Example using from_model_id:
    .. code-block:: python

        from langchain_community.llms import WeightOnlyQuantPipeline
        from intel_extension_for_transformers.transformers import (
            WeightOnlyQuantConfig
        )
        config = WeightOnlyQuantConfig
        hf = WeightOnlyQuantPipeline.from_model_id(
            model_id="google/flan-t5-large",
            task="text2text-generation"
            pipeline_kwargs={"max_new_tokens": 10},
            quantization_config=config,
        )
Example passing pipeline in directly:
    .. code-block:: python

        from langchain_community.llms import WeightOnlyQuantPipeline
        from intel_extension_for_transformers.transformers import (
            AutoModelForSeq2SeqLM
        )
        from intel_extension_for_transformers.transformers import (
            WeightOnlyQuantConfig
        )
        from transformers import AutoTokenizer, pipeline

        model_id = "google/flan-t5-large"
        tokenizer = AutoTokenizer.from_pretrained(model_id)
        config = WeightOnlyQuantConfig
        model = AutoModelForSeq2SeqLM.from_pretrained(
            model_id,
            quantization_config=config,
        )
        pipe = pipeline(
            "text-generation",
            model=model,
            tokenizer=tokenizer,
            max_new_tokens=10,
        )
        hf = WeightOnlyQuantPipeline(pipeline=pipe)
Npipelinemodel_idmodel_kwargspipeline_kwargsallow)extrataskdevice
device_mapload_in_4bitload_in_8bitquantization_configkwargsreturnc
           	      2   Ub&  [        U[        5      (       a  US:  a  [        S5      e[        R                  R                  S5      c  [        S5      e SSKJnJn  SSK	J
n  SS	KJn  SS
KJn  [        U[        5      (       a,  US:  a&  U" 5       (       d  [        S5      eS[        U5      -   nO[        U[        5      (       a  US:  a  SnUc  Uc  SnU=(       d    0 nUR                   " U40 UD6n US:X  a  UR                   " U4UUU	SUS.UD6nO6US;   a  UR                   " U4UUU	SUS.UD6nO[        SU S["         S35      eSU;   a-  UR%                  5        VVs0 sH  u  nnUS:w  d  M  UU_M     nnnU=(       d    0 nU" SUUUUUS.UD6nUR&                  ["        ;  a   [        SUR&                   S["         S35      eU " SUUUUS.U
D6$ ! [         a    [        S5      ef = f! [         a  n[        SU S35      UeSnAff = fs  snnf )z5Construct the pipeline object from model_id and task.Nz7`Device` and `device_map` cannot be set simultaneously!torchz;Weight only quantization pipeline only support PyTorch now!r   )AutoModelForCausalLMAutoModelForSeq2SeqLM)is_ipex_available)AutoTokenizer)r   zCould not import transformers python package. Please install it with `pip install transformers` and `pip install intel-extension-for-transformers`.z)Don't find out Intel GPU on this machine!zxpu:cpur   F)r   r   r   use_llm_runtimer   )r   r   Got invalid task , currently only  are supportedzCould not load the z# model due to missing dependencies.trust_remote_code)r   model	tokenizerr   r   )r   r   r   r    )
isinstanceint
ValueError	importlibutil	find_spec-intel_extension_for_transformers.transformersr"   r#   ,intel_extension_for_transformers.utils.utilsr$   transformersr%   r   ImportErrorstrfrom_pretrainedVALID_TASKSitemsr   )clsr   r   r   r   r   r   r   r   r   r   r"   r#   r$   r%   hf_pipeline_model_kwargsr-   r,   ekv_pipeline_kwargsr   s                           i/var/www/html/shao/venv/lib/python3.13/site-packages/langchain_community/llms/weight_only_quantization.pyfrom_model_id%WeightOnlyQuantPipeline.from_model_idO   s    !z&#'>'>6B;VWW>>##G,4M 	 W2< fc""v{$&& !LMM#f+-J$$!F>!"
$*!11(LmL		((,<<!-!-(;$)) $ BB-==!-!-(;$)) $ !'v .&&1].B  -/!.!4!4!6!6A!?R:R1!6   +0b 
&
 
 ==+#HMM? 3""-n>   
&,	

 
 	
K  	F 	\  	%dV+NO	s<   G 5G1 G1 5G1 $H4HG.1
H;HHc                 J    U R                   U R                  U R                  S.$ )zGet the identifying parameters.r   r   r   rH   selfs    rD   _identifying_params+WeightOnlyQuantPipeline._identifying_params   s'      --#33
 	
    c                     g)zReturn type of llm.weight_only_quantizationr.   rI   s    rD   	_llm_type!WeightOnlyQuantPipeline._llm_type   s     *rM   promptstoprun_managerc                    U R                  U5      nU R                   R                  S:X  a  US   S   [        U5      S nOpU R                   R                  S:X  a	  US   S   nOMU R                   R                  S:X  a	  US   S   nO*[        SU R                   R                   S	[         S
35      eU(       a  [        Xb5      nU$ )a  Call the HuggingFace model and return the output.

Args:
    prompt: The prompt to use for generation.
    stop: A list of strings to stop generation when encountered.

Returns:
    The generated text.

Example:
    .. code-block:: python

        from langchain_community.llms import WeightOnlyQuantPipeline
        llm = WeightOnlyQuantPipeline.from_model_id(
            model_id="google/flan-t5-large",
            task="text2text-generation",
        )
        llm.invoke("This is a prompt.")
r   r   generated_textNr   r   summary_textr(   r)   r*   )r   r   lenr1   r;   r
   )rJ   rR   rS   rT   r   responsetexts          rD   _callWeightOnlyQuantPipeline._call   s    4 ==(==!22A;/0V?D]]#99A;/0D]]?2A;~.D#DMM$6$6#7 8""-n>   't2DrM   r.   )r    NNNFFN)NN)__name__
__module____qualname____firstlineno____doc__r   r   __annotations__DEFAULT_MODEL_IDr   r9   r   r   dictr   r	   model_configclassmethodr0   boolr   rE   propertyr   rK   rP   r   r   r[   __static_attributes__r.   rM   rD   r   r      s   /b Hc$Hc$*#'L(4.'1&*OXd^*4L 
 !#$('+*.',',-1h
h
 h
 	h

 SMh
 tnh
 "$h
 tnh
 tnh
 &c]h
 h
 
h
 h
T 
WS#X%6 
 
 *3 * * %):>	++ tCy!+ 67	+
 + 
+ +rM   r   )r2   typingr   r   r   r    langchain_core.callbacks.managerr   #langchain_core.language_models.llmsr   pydanticr	   langchain_community.llms.utilsr
   rc   DEFAULT_TASKr;   r   r.   rM   rD   <module>rp      s8     / / E 3  >) %Jdc drM   