
    dh(                        S SK Jr  S SKrS SKJr  S SKJrJrJrJ	r	J
r
  S SKrS SKJr  S SKJr  S SKJr  S SKJr  S S	KJr   " S
 S\5      rg)    )annotationsN)StringIO)AnyDictIteratorListOptional)CallbackManagerForLLMRun)LLM)GenerationChunk)get_pydantic_field_names)
ConfigDictc                     \ rS rSr% SrSrS\S'    SrS\S'    S	rS
\S'    Sr	S\S'    Sr
S\S'    SrS\S'    SrS\S'    SrS\S'    SrS\S'    SrS\S'    SrS\S'    SrS\S'    SrS\S'    S rS\S!'    S"rS
\S#'    S$rS\S%'    S$rS\S&'    SrS\S''    S(rS\S)'    S*rS\S+'    \" S,S-9r\S6S. j5       r\S7S/ j5       r\S8S0 j5       r S9     S:S1 jjr   S;         S<S2 jjr!  S;         S=S3 jjr"S>S4 jr#S5r$g)?	Llamafile   a  Llamafile lets you distribute and run large language models with a
single file.

To get started, see: https://github.com/Mozilla-Ocho/llamafile

To use this class, you will need to first:

1. Download a llamafile.
2. Make the downloaded file executable: `chmod +x path/to/model.llamafile`
3. Start the llamafile in server mode:

    `./path/to/model.llamafile --server --nobrowser`

Example:
    .. code-block:: python

        from langchain_community.llms import Llamafile
        llm = Llamafile()
        llm.invoke("Tell me a joke.")
zhttp://localhost:8080strbase_urlNzOptional[int]request_timeoutFbool	streamingintseedg?floattemperature(   top_kgffffff?top_pg?min_p	n_predictr   n_keepg      ?tfs_z	typical_pg?repeat_penalty@   repeat_last_nTpenalize_nlg        presence_penaltyfrequency_penaltymirostatg      @mirostat_taug?mirostat_etaforbid)extrac                    g)N	llamafile )selfs    Z/var/www/html/shao/venv/lib/python3.13/site-packages/langchain_community/llms/llamafile.py	_llm_typeLlamafile._llm_typex   s        c                n    / SQn[        U R                  5       Vs/ sH  o"U;  d  M
  UPM     nnU$ s  snf )N)r   cachecallback_manager	callbacksmetadatanamer   r   tagsverbosecustom_get_token_ids)r   	__class__)r2   ignore_keyskattrss       r3   _param_fieldnamesLlamafile._param_fieldnames|   sB    
 0?
?!KCWA? 	 
 
s   22c                J    0 nU R                    H  n[        X5      X'   M     U$ N)rD   getattr)r2   params	fieldnames      r3   _default_paramsLlamafile._default_params   s)    //I ' 8F 0r6   c                    U R                   nUR                  5        H  u  pEXC;   d  M  XSU'   M     Ub  [        U5      S:  a  XS'   U R                  (       a  SUS'   U$ )Nr   stopTstream)rK   itemslenr   )r2   rN   kwargsrI   rB   vs         r3   _get_parametersLlamafile._get_parameters   s^     %%
 LLNDA{q	 # D	A!6N>>#F8r6   c                   U R                   (       a\  [        5        nU R                  " U4X#S.UD6 H  nUR                  UR                  5        M      UR                  5       nSSS5        U$ U R                  " SSU0UD6nSU0UEn	 [        R                  " U R                   S3SS0U	SU R                  S	9n
U
R                  5         SU
l        U
R                  5       S   nU$ ! , (       d  f       W$ = f! [        R                  R                   a.    [        R                  R                  S
U R                   S35      ef = f)aX  Request prompt completion from the llamafile server and return the
output.

Args:
    prompt: The prompt to use for generation.
    stop: A list of strings to stop generation when encountered.
    run_manager:
    **kwargs: Any additional options to pass as part of the
    generation request.

Returns:
    The string generated by the model.

)rN   run_managerNrN   prompt/completionContent-Typeapplication/jsonFurlheadersjsonrO   timeoutTCould not connect to Llamafile server. Please make sure that a server is running at .zutf-8contentr1   )r   r   _streamwritetextgetvaluerT   requestspostr   r   
exceptionsConnectionErrorraise_for_statusencodingr_   )r2   rX   rN   rW   rR   buffchunkrf   rI   payloadresponses              r3   _callLlamafile._call   sO   , >>t!\\!%BHE JJuzz*
 }}  K ))>t>v>F262G#====/5&(: !  00 %%' 'H==?9-DKG  K  &&66 ))99337==/D s   AC&	1C8 &
C58AEc              +    #    U R                   " SSU0UD6nSU;  a  SUS'   SU0UEn [        R                  " U R                   S3SS0USU R                  S9nSUl        UR                  SS9 HA  nU R                  U5      n	[        U	S9n
U(       a  UR                  U
R                  S9  U
v   MC     g! [        R
                  R                   a.    [        R
                  R                  S	U R                   S
35      ef = f7f)a"  Yields results objects as they are generated in real time.

        It also calls the callback manager's on_llm_new_token event with
        similar parameters to the OpenAI LLM class method of the same name.

        Args:
            prompt: The prompts to pass into the model.
            stop: Optional list of stop words to use when generating.
            run_manager:
            **kwargs: Any additional options to pass as part of the
            generation request.

        Returns:
            A generator representing the stream of tokens being generated.

        Yields:
            Dictionary-like objects each containing a token

        Example:
        .. code-block:: python

            from langchain_community.llms import Llamafile
            llm = Llamafile(
                temperature = 0.0
            )
            for chunk in llm.stream("Ask 'Hi, how are you?' like a pirate:'",
                    stop=["'","
"]):
                result = chunk["choices"][0]
                print(result["text"], end='', flush=True)

        rN   rO   TrX   rY   rZ   r[   r\   ra   rb   utf8)decode_unicode)rf   )tokenNr1   )rT   rh   ri   r   r   rj   rk   rm   
iter_lines_get_chunk_contentr   on_llm_new_tokenrf   )r2   rX   rN   rW   rR   rI   rp   rq   	raw_chunkrc   ro   s              r3   rd   Llamafile._stream   s#    L %%:4:6:6!#F8V.v.	}}}}o[1"$6 ,,H #!,,D,AI--i8G#1E,,5::,>K B ""22 	%%55//3}}oQ@ 	s   &D1B6 AD6ADDc                    UR                  S5      (       a,  UR                  S5      n[        R                  " U5      nUS   $ U$ )zWhen streaming is turned on, llamafile server returns lines like:

'data: {"content":" They","multimodal":true,"slot_id":0,"stop":false}'

Here, we convert this to a dict and return the value of the 'content'
field
zdata:zdata: rc   )
startswithlstripr_   loads)r2   ro   cleaneddatas       r3   ry   Llamafile._get_chunk_content1  s@     G$$ll8,G::g&D	?"Lr6   r1   )returnr   )r   z	List[str])r   Dict[str, Any]rG   )rN   Optional[List[str]]rR   r   r   r   )NN)
rX   r   rN   r   rW   "Optional[CallbackManagerForLLMRun]rR   r   r   r   )
rX   r   rN   r   rW   r   rR   r   r   zIterator[GenerationChunk])ro   r   r   r   )%__name__
__module____qualname____firstlineno____doc__r   __annotations__r   r   r   r   r   r   r   r    r!   r"   r#   r$   r&   r'   r(   r)   r*   r+   r,   r   model_configpropertyr4   rD   rK   rT   rr   rd   ry   __static_attributes__r1   r6   r3   r   r      s0   * ,Hc+;%)O])%ItJ
 D#N# K#E3O E57 E5@ Is<
 FCO
 E5NIu   NE M3$ K "e!A"u"AHc L%GL%FL    .   +/':=	. %):>	:: ": 8	:
 : 
:~ %):>	DD "D 8	D
 D 
#DLr6   r   )
__future__r   r_   ior   typingr   r   r   r   r	   rh    langchain_core.callbacks.managerr
   #langchain_core.language_models.llmsr   langchain_core.outputsr   langchain_core.utilsr   pydanticr   r   r1   r6   r3   <module>r      s4    "   6 6  E 3 2 9 p pr6   