
    dh`                     v    S SK JrJrJrJrJrJr  S SKJr  S SK	J
r
  S SKJr  S SKJr  S SKJr   " S S\
5      rg	)
    )AnyCallableDictIteratorListOptional)CallbackManagerForLLMRun)LLM)GenerationChunk)pre_init)Fieldc                      \ rS rSr% SrSr\\S'   \\S'   Sr	\\S'   Sr
\\S'   Sr\\S'   Sr\\S	'   Sr\\S
'   \r\\S'   \" / 5      r\\   \S'    \" S5      r\\S'    \" S5      r\\S'    \" S5      r\\S'    \" S5      r\\\      \S'    \S\\\4   S\\\4   4S j5       r\S\4S j5       rS\S\4S jr   S S\S\\\      S\\!   S\S\4
S jjr"  S S\S\\\      S\\!   S\S\#\$   4
S jjr%Sr&g)!	ExLlamaV2
   a  ExllamaV2 API.

- working only with GPTQ models for now.
- Lora models are not supported yet.

To use, you should have the exllamav2 library installed, and provide the
path to the Llama model as a named parameter to the constructor.
Check out:

Example:
    .. code-block:: python

        from langchain_community.llms import Exllamav2

        llm = Exllamav2(model_path="/path/to/llama/model")

#TODO:
- Add loras support
- Add support for custom settings
- Add support for custom stop sequences
Nclient
model_pathexllama_cacheconfig	generator	tokenizersettingslogfuncstop_sequences   max_new_tokensT	streamingverbosedisallowed_tokensvaluesreturnc                 ~    SS K nUR                  R                  5       (       d  [	        S5      e SSKJnJnJnJ	n  SSK
JnJn	  US   n
U
(       d  S US	'   US	   nUS
   (       a  US
   nU" UR                  5        O[        S5      eU" 5       nUS   Ul        UR!                  5         U" U5      nU" USS9nUR#                  U5        U" U5      nUS   (       a
  U	" XU5      nO	U" XU5      nUS    Vs/ sH!  nUR%                  5       R'                  5       PM#     snUS'   [)        USUS   5        U" SUS    35        UR+                  S5      nU(       a  UR-                  UU5        XS'   UUS'   XS'   UUS'   XS'   U$ ! [         a  n[        S5      UeS nAff = f! [         a    [        S5      ef = fs  snf )Nr   z@Unable to import torch, please install with `pip install torch`.z/CUDA is not available. ExllamaV2 requires CUDA.)r   ExLlamaV2CacheExLlamaV2ConfigExLlamaV2Tokenizer)ExLlamaV2BaseGeneratorExLlamaV2StreamingGeneratorzCould not import exllamav2 library. Please install the exllamav2 library with (cuda 12.1 is required)example : !python -m pip install https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp311-cp311-linux_x86_64.whlr   c                      g )N )argskwargss     Z/var/www/html/shao/venv/lib/python3.13/site-packages/langchain_community/llms/exllamav2.py<lambda>0ExLlamaV2.validate_environment.<locals>.<lambda>_   s        r   r   z<settings is required. Custom settings are not supported yet.r   T)lazyr   r   zstop_sequences r   r   r   r   r   r   )torchImportErrorcudais_availableEnvironmentError	exllamav2r   r"   r#   r$   exllamav2.generatorr%   r&   __dict__NotImplementedError	model_dirprepareload_autosplitstriplowersetattrgetdisallow_tokens)clsr   r0   er   r"   r#   r$   r%   r&   r   r   r   r   modelr   r   r   x
disalloweds                       r+   validate_environmentExLlamaV2.validate_environment>   s   	 zz&&(("#TUU	  # <F9#*j)HH%%&%N  !"!,/&!&u48]+&v.	+3E)TI.uYOI @FFV?W#X?W!AGGIOO$5?W#X *F3C,DE/&)9":!;<=ZZ 34
$$Y
; x'{!x'{"/M  	R	$  	^ 	L $Ys(   F F! 6'F:
FFF!F7c                     g)zReturn type of llm.r   r(   )selfs    r+   	_llm_typeExLlamaV2._llm_type   s     r.   textc                 L    U R                   R                  R                  U5      $ )z-Get the number of tokens present in the text.)r   r   
num_tokens)rI   rL   s     r+   get_num_tokensExLlamaV2.get_num_tokens   s    ~~''22488r.   promptstoprun_managerr*   c                     U R                   nU R                  (       a)  SnU R                  XX4S9 H  nU[        U5      -  nM     U$ UR	                  UU R
                  U R                  S9nU[        U5      S  nU$ )N )rQ   rR   rS   r*   )rQ   gen_settingsrN   )r   r   _streamstrgenerate_simpler   r   len)	rI   rQ   rR   rS   r*   r   combined_text_outputchunkoutputs	            r+   _callExLlamaV2._call   s     NN	>>#% k &  %E
2$ ('..!]].. / F CKM*FMr.   c              +     #    U R                   R                  U5      nU R                  R                  5         U R                  R	                  / 5        U R                  R                  XPR                  5        Sn U R                  R                  5       u  pxn	US-  nU(       a  UR                  UU R                  S9  Uv   U(       d  X`R                  :X  a   g Ma  7f)Nr      )tokenr   )r   encoder   warmupset_stop_conditionsbegin_streamr   streamon_llm_new_tokenr   r   )
rI   rQ   rR   rS   r*   	input_idsgenerated_tokensr\   eos_s
             r+   rW   ExLlamaV2._stream   s      NN))&1	**2.##I}}= NN113ME!,, LL -  K&*=*== s   CCr(   )NN)'__name__
__module____qualname____firstlineno____doc__r   r   __annotations__rX   r   r   r   r   r   printr   r   r   r   r   r   intr   boolr   r   r   r   r   rF   propertyrJ   rO   r	   r^   r   r   rW   __static_attributes__r(   r.   r+   r   r   
   s   , FCOM3FCIsIs Hc GX %b	NDI)=*NC$/DkIt!8$KGT- .34[xS	*87I$sCx. IT#s(^ I IV 3  93 93 9 %):>	 tCy! 67	
  
: %):>	 tCy! 67	
  
/	" r.   r   N)typingr   r   r   r   r   r   langchain_core.callbacksr	   langchain_core.language_modelsr
   langchain_core.outputsr   langchain_core.utilsr   pydanticr   r   r(   r.   r+   <module>r      s(    @ @ = . 2 ) ~ ~r.   