
    ChX                        S SK Jr  S SKrS SKJr  S SKrS SKJr  S SKJr  SSK	J
r
  \R                  " \5      r " S S	\5      rg)
    )annotationsN)Literal)Tensor)InputModule   )WhitespaceTokenizerc                     ^  \ rS rSr% SrSrS\S'   / SQrS\S'   0 S	S
4       SU 4S jjjrSS jr	SS jr
S r S     SS jjrS
S.SS jjrSrU =r$ )BoW   zImplements a Bag-of-Words (BoW) model to derive sentence embeddings.

A weighting can be added to allow the generation of tf-idf vectors. The output vector has the size of the vocab.
Fboolsave_in_root)vocabword_weightsunknown_word_weightcumulative_term_frequency	list[str]config_keysr   Tc                  > [         TU ]  5         [        [        R	                  U5      5      nXl        X l        X0l        X@l        / U l	        SnU HV  nUnXb;   a  X&   nO,UR                  5       U;   a  X&R                  5          nOUS-  nU R                  R                  U5        MX     [        R                  U S[        U5       SU 35        [        U[!        5       SS9U l        [        U5      U l        g )Nr   r   z out of z0 words without a weighting value. Set weight to F)
stop_wordsdo_lower_case)super__init__listdictfromkeysr   r   r   r   weightslowerappendloggerinfolenr   set	tokenizersentence_embedding_dimension)	selfr   r   r   r   num_unknown_wordswordweight	__class__s	           X/var/www/html/shao/venv/lib/python3.13/site-packages/sentence_transformers/models/BoW.pyr   BoW.__init__   s     	T]]5)*
(#6 )B& D(F#%+-%jjl3!Q&!LL'  	 !#e*5efyez{	
 -UsuTYZ,/J)    c                    U$ N )r%   featuress     r*   forwardBoW.forward;   s    r,   c                    U Vs/ sH  o0R                   R                  " U40 UD6PM!     nnU R                  U5      $ s  snf r.   )r#   tokenizeget_sentence_features)r%   textskwargstext	tokenizeds        r*   r4   BoW.tokenize?   s?    INO^^,,T<V<	O)))44 Ps   %=c                    U R                   $ r.   )r$   )r%   s    r*    get_sentence_embedding_dimension$BoW.get_sentence_embedding_dimensionC   s    000r,   c                X   / nU H  n[         R                  " U R                  5       [         R                  S9nU H@  nU R                  (       a  XV==   U R
                  U   -  ss'   M/  U R
                  U   XV'   MB     UR                  U5        M     S[         R                  " U5      0$ )N)dtypesentence_embedding)torchzerosr<   float32r   r   r   stack)r%   tokenized_textspad_seq_lengthvectorstokensvectortokens          r*   r5   BoW.get_sentence_featuresF   s     %F[[!F!F!HPUP]P]^F11MT\\%%88M$(LL$7FM	  
 NN6" & %ekk'&:;;r,   )safe_serializationc               &    U R                  U5        g r.   )save_config)r%   output_pathrL   argsr7   s        r*   saveBoW.saveV   s    %r,   )r   r$   r#   r   r   r   r   )r   r   r   zdict[str, float]r   floatr   r   )r0   zdict[str, Tensor])r6   r   returnz	list[int])r   )rE   zlist[list[int]]rF   intrT   z1dict[Literal['sentence_embedding'], torch.Tensor])rO   strrL   r   rT   None)__name__
__module____qualname____firstlineno____doc__r   __annotations__r   r   r1   r4   r<   r5   rQ   __static_attributes____classcell__)r)   s   @r*   r
   r
      s    
 L$jKj
 *,%&*. 7 7 ' 7 #	 7
 $( 7  7D51 GH<.<@C<	:<  HL & &r,   r
   )
__future__r   loggingtypingr   rA   r   (sentence_transformers.models.InputModuler   r#   r   	getLoggerrX   r   r
   r/   r,   r*   <module>re      s8    "     @ *			8	$G&+ G&r,   