
    Ch                        S SK Jr  S SKrS SKrS SKrS SKJr   S SKJr  S SKrS SKrS SKJr  S SKJr  S SKJr  S SKJrJrJr  S	S
KJrJrJr  \R4                  " \5      r " S S\5      rg! \	 a	    S SK
Jr   NZf = f)    )annotationsN)PreTrainedTokenizerBase)Self)nn)tqdm)Module)fullnamehttp_getimport_from_string   )TransformersTokenizerWrapperWhitespaceTokenizerWordTokenizerc                      \ rS rSr% / SQrS\S'   SrS\S'     S     SS	 jjrS
 rSS jr	SS jr
SSS jjrS r\     S             SS jj5       r\SS\" 5       S4       SS jj5       rSrg)WordEmbeddings   tokenizer_classupdate_embeddingsmax_seq_length	list[str]config_keyszwordembedding_config.jsonstrconfig_file_nameFc                |   [         R                  R                  U 5        [        U[        5      (       a  [        U5      nO [        U[        5      (       d  [        S5      e[        U[        5      (       a  [        R                  " U5      n[        U[        R                  5      (       a  [        R                  " U5      nUR                  5       u  pVX`l        [         R                   " XV5      U l        U R"                  R%                  SU05        X0R"                  R&                  l        Xl        X0l        X@l        g )Nz>tokenizer must be a WordTokenizer or a HuggingFace tokenizer. weight)r   r   __init__
isinstancer   r   r   
ValueErrorlistnpasarrayndarraytorch
from_numpysizeembeddings_dimension	Embedding	emb_layerload_state_dictr   requires_grad	tokenizerr   r   )selfr,   embedding_weightsr   r   num_embeddingsr'   s          c/var/www/html/shao/venv/lib/python3.13/site-packages/sentence_transformers/models/WordEmbeddings.pyr   WordEmbeddings.__init__   s     			4 i!8994Y?II}55]^^'.. "

+< ='44 % 0 01B C/@/E/E/G,$8!nK&&2C'DE.?+"!2,    c                b    U R                  US   5      nS nUR                  UUUS   S.5        U$ )N	input_idsattention_mask)token_embeddingscls_token_embeddingsr5   )r)   update)r-   featuresr6   
cls_tokenss       r0   forwardWordEmbeddings.forward:   sB    >>(;*?@
$4(2"*+;"<	
 r2   c                2   U Vs/ sH  o0R                   R                  " U40 UD6PM!     nnU Vs/ sH  n[        U5      PM     nn[        U5      n/ n/ n	U HI  nS/U[        U5      -
  -  n
UR	                  XZ-   5        U	R	                  S/[        U5      -  U
-   5        MK     [
        R                  " U[
        R                  S9[
        R                  " U	[
        R                  S9[
        R                  " U[
        R                  S9S.nU$ s  snf s  snf )Nr   r   )dtype)r4   r5   sentence_lengths)r,   tokenizelenmaxappendr$   tensorlong)r-   textskwargstexttokenized_textstokensr?   max_lenr4   attention_maskspaddingoutputs               r0   r@   WordEmbeddings.tokenizeF   s    OTUut>>224B6BuU6EFoFCKoF&'	%FcWs6{23GV-.""A3V#4w#>? & iuzzB#ll?%**M %-=UZZ P
 # VFs
   %DDc                    U R                   $ )N)r'   r-   s    r0   get_word_embedding_dimension+WordEmbeddings.get_word_embedding_dimensionZ   s    (((r2   c                z    U R                  U5        U R                  XS9  U R                  R                  U5        g )N)safe_serialization)save_configsave_torch_weightsr,   save)r-   output_pathrU   s      r0   rX   WordEmbeddings.save]   s3    %SK(r2   c                \    [        U R                  5      U R                  U R                  S.$ )Nr   )r	   r,   r   r   rQ   s    r0   get_config_dictWordEmbeddings.get_config_dictb   s*    '7!%!7!7"11
 	
r2   Nc                    UUUUUS.nU R                   " SSU0UD6n	[        U	R                  S5      5      n
U R                  " SSU0UD6nU
R	                  U5      nU R
                  " SSU0UD6nU " SXS   S.U	D6nU$ )N)	subfoldertokencache_folderrevisionlocal_files_onlymodel_name_or_pathr   zemb_layer.weight)r,   r.    )load_configr   popload_dir_pathloadload_torch_weights)clsrd   r_   r`   ra   rb   rc   rG   
hub_kwargsconfigr   tokenizer_local_pathr,   weightsmodels                  r0   ri   WordEmbeddings.loadi   s     #(  0

 U4FU*U,VZZ8I-JK"00eDVeZde#(()=>	((]<N]R\]aiCU;VaZ`ar2    c           
     ~   [         R                  SU 35        [        R                  R	                  U5      (       dB  [         R                  U S35        SU;   d  SU;   a  [        SU 35      eSU-   n[        Xa5        S n/ n/ n	UR                  S5      (       a  [        R                  " USS	S
9O	[        US	S
9 n
[        U
SSS9nU GH$  nUR                  5       R                  U5      nU(       d  [        U5      S:X  a  M;  US   nUcD  [        U5      S-
  nUR                  S5        U	R                  [        R                   " U5      5        [        U5      S-
  U:w  a  [         R#                  S5        M  [        R$                  " USS   Vs/ sH  n['        U5      PM     sn5      nU	R                  U5        UR                  U5        Uc  GM
  US:  d  GM  [        U5      U:  d  GM%    O   [        R(                  " U	5      n	UR+                  U5        U " XIUS9sS S S 5        $ s  snf ! , (       d  f       g = f)NzRead in embeddings file z, does not exist, try to download from server/\zEmbeddings file not found: zAhttps://public.ukp.informatik.tu-darmstadt.de/reimers/embeddings/z.gzrtutf8)encodingzLoad Word Embeddings
Embeddings)descunit   r   r   PADDING_TOKENz\ERROR: A line in the embeddings file had more or less  dimensions than expected. Skip token.)r,   r.   r   )loggerinfoospathexistsr   r
   endswithgzipopenr   rstripsplitrA   rC   r!   zeroserrorarrayfloatr"   	set_vocab)rk   embeddings_file_pathr   item_separatorr,   max_vocab_sizeurlr'   vocab
embeddingsfIniteratorliner   wordnumvectors                    r0   from_text_fileWordEmbeddings.from_text_file   s    	./C.DEFww~~233KK/00\]^**d6J.J #>?S>T!UVVUXllCS/#
 $,,U33 II*D6B*V<=@CC&<<PH ++N;UqQx'/+.u:>(LL1%%bhh/C&DE JN)* LLv qr"C#5:"CD!!&)T"!-.12DUVdId5 !8 J/J&\mnI= =4 #D5= =s1   ;CH.H)$*H.H.H..1H.)H..
H<)r)   r'   r   r,   r   )Fi@B )r,   z'WordTokenizer | PreTrainedTokenizerBaser   boolr   int)rF   r   )returnr   )T)rY   r   rU   r   ) NNNF)rd   r   r_   r   r`   zbool | str | Nonera   
str | Nonerb   r   rc   r   r   r   )r   r   r   r   r   r   r   z
int | None)__name__
__module____qualname____firstlineno__r   __annotations__r   r   r;   r@   rR   rX   r\   classmethodri   r   r   __static_attributes__re   r2   r0   r   r      s   WKW7c7 #(%-:-  	-
 -6
())

  #'#'#!&  !	
 !   
 4  #(!%'%)<o!<o  <o 	<o #<o <or2   r   )
__future__r   r   loggingr   transformersr   typingr   ImportErrortyping_extensionsnumpyr!   r$   r   r   #sentence_transformers.models.Moduler   sentence_transformers.utilr	   r
   r   r,   r   r   r   	getLoggerr   r~   r   re   r2   r0   <module>r      sm    "   	 0'     6 M M W W			8	$foV fo!  '&'s   A, ,A;:A;