
    $h k                       S SK Jr  S SKrS SKrS SKJrJrJr  S SKJ	r	J
r
JrJrJr  S SKrS SKrS SKJr  S SKJr  S SKJrJrJr  S SKJrJrJrJrJr  S S	KJr  \R@                  " \!5      r"            SS
 jr# " S S\\5      r$g)    )annotationsN)IterableMappingSequence)AnyLiteralOptionalUnioncast)
Embeddings)run_in_executor)from_envget_pydantic_field_namessecret_from_env)	BaseModel
ConfigDictField	SecretStrmodel_validator)Selfc                &   [        U 5       Vs/ sH  n/ PM     nn[        U 5       Vs/ sH  n/ PM     nn[        [        U5      5       HV  nU(       a  [        X(   5      S:X  a  M  XcU      R                  X(   5        XsU      R                  [        X   5      5        MX     / n	[        U 5       H  nXh   n
[        U
5      S:X  a  U	R                  S 5        M)  [        U
5      S:X  a  U	R                  U
S   5        MN  [        Xx   5      n[	        U
6  Vs/ sH$  n[        S [	        XU   5       5       5      U-  PM&     nn[        S U 5       5      S-  nU	R                  U Vs/ sH  oU-  PM	     sn5        M     U	$ s  snf s  snf s  snf s  snf )N   r   c              3  ,   #    U H  u  pX-  v   M     g 7fN ).0valweights      X/var/www/html/shao/venv/lib/python3.13/site-packages/langchain_openai/embeddings/base.py	<genexpr>6_process_batched_chunked_embeddings.<locals>.<genexpr>@   s      'M L'Ms   c              3  (   #    U H	  oS -  v   M     g7f)   Nr   )r   r   s     r   r    r!   J   s     6gsFg   g      ?)rangelenappendsumzip)	num_textstokensbatched_embeddingsindices
skip_empty_resultsnum_tokens_in_batchi
embeddings_resulttotal_weight	embeddingaverage	magnituder   s                   r   #_process_batched_chunked_embeddingsr9      s    5:)4D'E4Dq4DG'E
 9>i8H+I8H1B8H+I3w< #0349
""#5#89AJ'..s69~>	 ! /1J9%,Zw<1 d#\Qgaj) 256L "%g "/I  '*9!6L'M  	
 "/   6g66#=I'B'3Y'BCA D c (F
 ,J< Cs   E?F*F	%F
c                     \ rS rSr% Sr\" SSS9rS\S'   \" SSS9rS\S'   S	r	S
\S'   Sr
S\S'    \	rS\S'   \" \" SSS9SS9rS\S'    \" S\" SSS9S9rS\S'    \" \" SSS9S9rS\S'   \" \" SSS9S9rS\S'   SrS\S '    \" S!\" S"SS9S9rS#\S$'    \" S%\" S&S'/SS9S9rS\S('    SrS)\S*'   SrS+\S,'   S-rS\S.'    S/rS\S0'    \" SS1S29rS3\S4'    SrS\S5'   SrS6\S7'    SrS\S8'    S9rS6\S:'    \" \S9rS;\S<'    S9r S6\S='    Sr!S>\S?'   Sr"S@\SA'   SBr#S\SC'    SDr$S\SE'    Sr%SF\SG'    Sr&SF\SH'    Sr'S6\SI'    \(" SJSSKSL9r)\*" SMSN9\+S\SO j5       5       r,\*" SPSN9S]SQ j5       r-\.S^SR j5       r/      S_SS jr0SST.         S`SU jjr1SST.         S`SV jjr2 Sa       SbSW jjr3 Sa       SbSX jjr4ScSY jr5ScSZ jr6S[r7g)dOpenAIEmbeddingsP   u	  OpenAI embedding model integration.

Setup:
    Install ``langchain_openai`` and set environment variable ``OPENAI_API_KEY``.

    .. code-block:: bash

        pip install -U langchain_openai
        export OPENAI_API_KEY="your-api-key"

Key init args — embedding params:
    model: str
        Name of OpenAI model to use.
    dimensions: Optional[int] = None
        The number of dimensions the resulting output embeddings should have.
        Only supported in ``'text-embedding-3'`` and later models.

Key init args — client params:
    api_key: Optional[SecretStr] = None
        OpenAI API key.
    organization: Optional[str] = None
        OpenAI organization ID. If not passed in will be read
        from env var ``OPENAI_ORG_ID``.
    max_retries: int = 2
        Maximum number of retries to make when generating.
    request_timeout: Optional[Union[float, Tuple[float, float], Any]] = None
        Timeout for requests to OpenAI completion API

See full list of supported init args and their descriptions in the params section.

Instantiate:
    .. code-block:: python

        from langchain_openai import OpenAIEmbeddings

        embed = OpenAIEmbeddings(
            model="text-embedding-3-large"
            # With the `text-embedding-3` class
            # of models, you can specify the size
            # of the embeddings you want returned.
            # dimensions=1024
        )

Embed single text:
    .. code-block:: python

        input_text = "The meaning of life is 42"
        vector = embeddings.embed_query("hello")
        print(vector[:3])

    .. code-block:: python

        [-0.024603435769677162, -0.007543657906353474, 0.0039630369283258915]

Embed multiple texts:
    .. code-block:: python

        vectors = embeddings.embed_documents(["hello", "goodbye"])
        # Showing only the first 3 coordinates
        print(len(vectors))
        print(vectors[0][:3])

    .. code-block:: python

        2
        [-0.024603435769677162, -0.007543657906353474, 0.0039630369283258915]

Async:
    .. code-block:: python

        await embed.aembed_query(input_text)
        print(vector[:3])

        # multiple:
        # await embed.aembed_documents(input_texts)

    .. code-block:: python

        [-0.009100092574954033, 0.005071679595857859, -0.0029193938244134188]

NT)defaultexcluder   clientasync_clientztext-embedding-ada-002strmodelOptional[int]
dimensionszOptional[str]
deploymentOPENAI_API_VERSION)r=   api_version)default_factoryaliasopenai_api_versionbase_urlOPENAI_API_BASE)rI   rH   openai_api_baseOPENAI_API_TYPE)rH   openai_api_typeOPENAI_PROXYopenai_proxyi  intembedding_ctx_lengthapi_keyOPENAI_API_KEYzOptional[SecretStr]openai_api_keyorganizationOPENAI_ORG_IDOPENAI_ORGANIZATIONopenai_organizationz%Union[Literal['all'], set[str], None]allowed_specialz4Union[Literal['all'], set[str], Sequence[str], None]disallowed_speciali  
chunk_sizer#   max_retriestimeout)r=   rI   z0Optional[Union[float, tuple[float, float], Any]]request_timeoutheadersbooltiktoken_enabledtiktoken_model_nameFshow_progress_bardict[str, Any]model_kwargsr.   zUnion[Mapping[str, str], None]default_headersz!Union[Mapping[str, object], None]default_query   retry_min_seconds   retry_max_secondszUnion[Any, None]http_clienthttp_async_clientcheck_embedding_ctx_lengthforbidr   )extrapopulate_by_nameprotected_namespacesbefore)modec           
     r   [        U 5      nUR                  S0 5      n[        U5       HQ  nXC;   a  [        SU S35      eXB;  d  M  [        R
                  " SU SU SU S35        UR                  U5      X4'   MS     UR                  UR                  5       5      nU(       a  [        SU S	35      eX1S'   U$ )
z>Build extra kwargs from additional params that were passed in.rg   zFound z supplied twice.z	WARNING! z/ is not default parameter.
                    zJ was transferred to model_kwargs.
                    Please confirm that z is what you intended.zParameters za should be specified explicitly. Instead they were passed in as part of `model_kwargs` parameter.)	r   getlist
ValueErrorwarningswarnpopintersectionkeys)clsvaluesall_required_field_namesrr   
field_nameinvalid_model_kwargss         r   build_extraOpenAIEmbeddings.build_extra  s     $<C#@ 

>2.v,J" 6*5E!FGG9!* .L !))34JN
 %+JJz$:! '  8DDUZZ\R23 4S T 
 "'~    afterc                   U R                   S;   a  [        S5      eU R                  (       a  U R                  R                  5       OSU R                  U R
                  U R                  U R                  U R                  U R                  S.nU R                  (       a]  U R                  (       d  U R                  (       a;  U R                  nU R                  nU R                  n[        SU< SU< SU< 35      eU R                  (       dz  U R                  (       a4  U R                  (       d#   SSKnUR!                  U R                  S
9U l        SU R                  0n["        R$                  " S0 UDUD6R&                  U l        U R(                  (       dz  U R                  (       a4  U R                  (       d#   SSKnUR+                  U R                  S
9U l        SU R                  0n["        R,                  " S0 UDUD6R&                  U l        U $ ! [         a  n[        S	5      UeSnAff = f! [         a  n[        S	5      UeSnAff = f)z?Validate that api key and python package exists in environment.)azureazure_adazureadzEIf you are using Azure, please use the `AzureOpenAIEmbeddings` class.N)rT   rW   rK   r_   r^   rh   ri   zwCannot specify 'openai_proxy' if one of 'http_client'/'http_async_client' is already specified. Received:
openai_proxy=z
http_client=z
http_async_client=r   zRCould not import httpx python package. Please install it with `pip install httpx`.)proxyrn   r   )rO   rz   rV   get_secret_valuerZ   rM   r`   r^   rh   ri   rQ   rn   ro   r?   httpxImportErrorClientopenaiOpenAIr3   r@   AsyncClientAsyncOpenAI)	selfclient_paramsrQ   rn   ro   r   esync_specificasync_specifics	            r   validate_environment%OpenAIEmbeddings.validate_environment!  s*    #CCW 
 ;?:M:M##446SW 44,,++++#33!//

 $"2"2d6L6L,,L**K $ 6 6!/K>1F4E3GI 
 {{  )9)9  $)<<d6G6G<#H *D,<,<=M --I-I=ITTDK    )?)?  */):):ARAR):)S&+T-C-CDN & 2 2 !! ! j  / # %F  # %F s0   /H :H1 
H.H))H.1
I;IIc                r    SU R                   0U R                  EnU R                  b  U R                  US'   U$ )NrB   rD   )rB   rg   rD   )r   paramss     r   _invocation_params#OpenAIEmbeddings._invocation_paramsZ  s8    At/@/@A??&#'??F< r   c                r   / n/ nU R                   =(       d    U R                  nU R                  (       d   SSKJn  UR                  US9n[        U5       H  u  pUR                  U	SS9n
[        S[        U
5      U R                  5       HG  nU
XU R                  -    nUR                  U5      nUR                  U5        UR                  U5        MI     M     GO" [        R                   " U5      nU R&                  U R(                  S.R+                  5        VVs0 sH  u  nnUc  M  UU_M     nnn[        U5       H  u  pU R                  R-                  S	5      (       a  U	R/                  S
S5      n	U(       a  UR                  " U	40 UD6nOUR1                  U	5      n[        S[        U5      U R                  5       H4  nUR                  UXU R                  -    5        UR                  U5        M6     M     U R2                  (       a$   SSKJn  U" [        S[        U5      U5      5      nO[        S[        U5      U5      nUX44$ ! [
         a    [        S5      ef = f! ["         a    [        R$                  " S5      n GNf = fs  snnf ! [
         a    [        S[        U5      U5      n Nlf = f)au  
Take the input `texts` and `chunk_size` and return 3 iterables as a tuple:

We have `batches`, where batches are sets of individual texts
we want responses from the openai api. The length of a single batch is
`chunk_size` texts.

Each individual text is also split into multiple texts based on the
`embedding_ctx_length` parameter (based on number of tokens).

This function returns a 3-tuple of the following:

_iter: An iterable of the starting index in `tokens` for each *batch*
tokens: A list of tokenized texts, where each text has already been split
    into sub-texts based on the `embedding_ctx_length` parameter. In the
    case of tiktoken, this is a list of token arrays. In the case of
    HuggingFace transformers, this is a list of strings.
indices: An iterable of the same length as `tokens` that maps each token-array
    to the index of the original text in `texts`.
r   )AutoTokenizerzCould not import transformers python package. This is needed for OpenAIEmbeddings to work without `tiktoken`. Please install it with `pip install transformers`. )pretrained_model_name_or_pathF)add_special_tokenscl100k_base)r[   r\   001
 )tqdm)rd   rB   rc   transformersr   r   rz   from_pretrained	enumerateencoder%   r&   rS   decoder'   tiktokenencoding_for_modelKeyErrorget_encodingr[   r\   itemsendswithreplaceencode_ordinaryre   	tqdm.autor   )r   textsr]   r+   r-   
model_namer   	tokenizerr2   text	tokenizedjtoken_chunk
chunk_textencodingkvencoder_kwargstokenr   _iters                        r   	_tokenizeOpenAIEmbeddings._tokenizea  s   . /1--;
 $$6 &55.8 6 I %U+'0'7'7QV'7'W	 q#i.$2K2KLA-6 9 99.K
 '0&6&6{&CJMM*-NN1% M ,@#66zB (,';';*.*A*A %'.DAq  1  . %U+::&&u--  <<c2D!$OODCNCE$44T:E q#e*d.G.GHAMM%0I0I,I"JKNN1% I ," !!:*"&uQFZ'H"I !S[*5Ef%%   V 6  @#00?@.<  :aVj9:s;   I I& 	J*J"J I#& J
	J
 J65J6)r]   c          	       ^ ^^ U=(       d    T R                   n0 T R                  EUEmT R                  X5      u  pgn/ n	U He  n
T R                  R                  " SSXzX-    0TD6n[        U[        5      (       d  UR                  5       nU	R                  S US    5       5        Mg     [        [        U5      XyUT R                  5      nSmSUUU 4S jjnU Vs/ sH  ob  UOU" 5       PM     sn$ s  snf )a  
Generate length-safe embeddings for a list of texts.

This method handles tokenization and embedding generation, respecting the
set embedding context length and chunk size. It supports both tiktoken
and HuggingFace tokenizer based on the tiktoken_enabled flag.

Args:
    texts (List[str]): A list of texts to embed.
    engine (str): The engine or model to use for embeddings.
    chunk_size (Optional[int]): The size of chunks for processing embeddings.

Returns:
    List[List[float]]: A list of embeddings for each input text.
inputc              3  (   #    U H	  oS    v   M     g7fr6   Nr   r   rs     r   r    <OpenAIEmbeddings._get_len_safe_embeddings.<locals>.<genexpr>       %O>Nn>Nr$   dataNc                    > TcN  TR                   R                  " SSS0TD6n [        U [        5      (       d  U R	                  5       n U S   S   S   mT$ Nr    r   r   r6   r   )r?   create
isinstancedict
model_dumpaverage_embedded_cached_empty_embeddingclient_kwargsr   s    r   empty_embeddingBOpenAIEmbeddings._get_len_safe_embeddings.<locals>.empty_embedding  s^    &.#';;#5#5#PB#P-#P !"2D99'7'B'B'D$*:6*B1*Ek*R'**r   r   returnlist[float])r]   r   r   r?   r   r   r   r   extendr9   r&   r.   r   r   enginer]   kwargs_chunk_sizer   r+   r-   r,   r2   responser3   r   r   r   r   s   `              @@r   _get_len_safe_embeddings)OpenAIEmbeddings._get_len_safe_embeddings  s   . !3DOO=422=f=!%!Cw02A{{)) 15BH h--#..0%%%Ohv>N%OO  9JGT__

 :>	+ 	+ DNN:a](99:NNNs   C2c          	     f  ^ ^^#    U=(       d    T R                   n0 T R                  EUEm[        ST R                  X5      I Sh  vN u  pgn/ n	[	        S[        U5      U5       Hm  n
T R                  R                  " SSXzX-    0TD6I Sh  vN n[        U[        5      (       d  UR                  5       nU	R                  S US    5       5        Mo     [        [        U5      XyUT R                  5      nSmSUUU 4S jjnU Vs/ sH  ob  UOU" 5       I Sh  vN PM     sn$  N N Ns  snf 7f)	a4  
Asynchronously generate length-safe embeddings for a list of texts.

This method handles tokenization and asynchronous embedding generation,
respecting the set embedding context length and chunk size. It supports both
`tiktoken` and HuggingFace `tokenizer` based on the tiktoken_enabled flag.

Args:
    texts (List[str]): A list of texts to embed.
    engine (str): The engine or model to use for embeddings.
    chunk_size (Optional[int]): The size of chunks for processing embeddings.

Returns:
    List[List[float]]: A list of embeddings for each input text.
Nr   r   c              3  (   #    U H	  oS    v   M     g7fr   r   r   s     r   r    =OpenAIEmbeddings._aget_len_safe_embeddings.<locals>.<genexpr>  r   r$   r   c                    >#    TcV  TR                   R                  " SSS0TD6I S h  vN n [        U [        5      (       d  U R	                  5       n U S   S   S   mT$  N67fr   )r@   r   r   r   r   r   s    r   r   COpenAIEmbeddings._aget_len_safe_embeddings.<locals>.empty_embedding$  sw     &.)-):):)A)A ** -* $  ""2D99'7'B'B'D$*:6*B1*Ek*R'**$s   %A!A7A!r   r   )r]   r   r   r   r%   r&   r@   r   r   r   r   r   r9   r.   r   s   `              @@r   _aget_len_safe_embeddings*OpenAIEmbeddings._aget_len_safe_embeddings  s9    0 !3DOO=422=f='6$..%(
 "
w 13q#f+{3A!..55 15B H h--#..0%%%Ohv>N%OO 4 9JGT__

 :>		+ 		+ JTTA]o.?(??TT;"

0 )@TsJ   A D1D&AD1D(A9D1D,D*
D,#D1(D1*D,,D1c           	        U=(       d    U R                   n0 U R                  EUEnU R                  (       d  / n[        S[	        U5      U5       He  nU R
                  R                  " SSXXt-    0UD6n[        U[        5      (       d  UR                  5       nUR                  S US    5       5        Mg     U$ [        [        U R                  5      n	U R                  " U4XS.UD6$ )a\  Call out to OpenAI's embedding endpoint for embedding search docs.

Args:
    texts: The list of texts to embed.
    chunk_size: The chunk size of embeddings. If None, will use the chunk size
        specified by the class.
    kwargs: Additional keyword arguments to pass to the embedding API.

Returns:
    List of embeddings, one for each text.
r   r   c              3  (   #    U H	  oS    v   M     g7fr   r   r   s     r   r    3OpenAIEmbeddings.embed_documents.<locals>.<genexpr>I       !K:JQK.:Jr$   r   r   r]   r   )r]   r   rp   r%   r&   r?   r   r   r   r   r   r   rA   rE   r   
r   r   r]   r   chunk_size_r   r3   r2   r   r   s
             r   embed_documents OpenAIEmbeddings.embed_documents1  s     !3DOO=422=f=..,.J1c%j+6;;-- AO48E "(D11'224H!!!K(6:J!KK 7  c4??+,,
 
;A
 	
r   c           	       #    U=(       d    U R                   n0 U R                  EUEnU R                  (       d  / n[        S[	        U5      U5       Hm  nU R
                  R                  " SSXXt-    0UD6I Sh  vN n[        U[        5      (       d  UR                  5       nUR                  S US    5       5        Mo     U$ [        [        U R                  5      n	U R                  " U4XS.UD6I Sh  vN $  N N7f)ab  Call out to OpenAI's embedding endpoint async for embedding search docs.

Args:
    texts: The list of texts to embed.
    chunk_size: The chunk size of embeddings. If None, will use the chunk size
        specified by the class.
    kwargs: Additional keyword arguments to pass to the embedding API.

Returns:
    List of embeddings, one for each text.
r   r   Nc              3  (   #    U H	  oS    v   M     g7fr   r   r   s     r   r    4OpenAIEmbeddings.aembed_documents.<locals>.<genexpr>k  r   r$   r   r   r   )r]   r   rp   r%   r&   r@   r   r   r   r   r   r   rA   rE   r   r   s
             r   aembed_documents!OpenAIEmbeddings.aembed_documentsS  s     !3DOO=422=f=..,.J1c%j+6!%!2!2!9!9 "AO4"8E"  "(D11'224H!!!K(6:J!KK 7  c4??+33
 
;A
 
 	

s%   A7C?9C;:A<C?6C=7C?=C?c                0    U R                   " U/40 UD6S   $ )zCall out to OpenAI's embedding endpoint for embedding query text.

Args:
    text: The text to embed.
    kwargs: Additional keyword arguments to pass to the embedding API.

Returns:
    Embedding for the text.
r   )r   )r   r   r   s      r   embed_queryOpenAIEmbeddings.embed_queryu  s!     ##TF5f5a88r   c                P   #    U R                   " U/40 UD6I Sh  vN nUS   $  N	7f)zCall out to OpenAI's embedding endpoint async for embedding query text.

Args:
    text: The text to embed.
    kwargs: Additional keyword arguments to pass to the embedding API.

Returns:
    Embedding for the text.
Nr   )r   )r   r   r   r3   s       r   aembed_queryOpenAIEmbeddings.aembed_query  s1       00$B6BB
!} Cs   &$
&)r@   r?   ro   rn   )r   rf   r   r   )r   r   )r   rf   )r   	list[str]r]   rR   r   z<tuple[Iterable[int], list[Union[list[int], str]], list[int]])
r   r  r   rA   r]   rC   r   r   r   list[list[float]]r   )r   r  r]   rC   r   r   r   r  )r   rA   r   r   r   r   )8__name__
__module____qualname____firstlineno____doc__r   r?   __annotations__r@   rB   rD   rE   r   rJ   rM   rO   rQ   rS   r   rV   rZ   r[   r\   r]   r^   r`   ra   rc   rd   re   r   rg   r.   rh   ri   rk   rm   rn   ro   rp   r   model_configr   classmethodr   r   propertyr   r   r   r   r   r   r   r  __static_attributes__r   r   r   r;   r;   P   s   Pd d3FC3dD9L#9)E3) $J$
 !&J%(- !5tD)  T%*(3Dd*S&O]  &+ !2DA&O]  #( >#L-  !%#$8*/9ISW)X+N'  R). 34d
*  Q=AO:AOSLSJ8K<HMIIOE GS!d!K)--J $t#8#(#>L.>VJ!6:O3:7;M4; s7s7$(K!( +/'.W'++- BL (#  $2 '"6 #6p  ^&^&,/^&	E^&N %)1O1O 	1O
 "1O 1O 
1Ot %)7U7U 	7U
 "7U 7U 
7Ut =A 
 
,9 
LO 
	 
F =A 
 
,9 
LO 
	 
D
9r   r;   )r*   rR   r+   zlist[Union[list[int], str]]r,   r  r-   z	list[int]r.   rb   r   zlist[Optional[list[float]]])%
__future__r   loggingr{   collections.abcr   r   r   typingr   r   r	   r
   r   r   r   langchain_core.embeddingsr   langchain_core.runnables.configr   langchain_core.utilsr   r   r   pydanticr   r   r   r   r   typing_extensionsr   	getLoggerr  loggerr9   r;   r   r   r   <module>r     s    "   7 7 6 6   0 ; T T M M "			8	$::': *: 	:
 : !:z|y* |r   