
    Ch	                    Z    S r SSKJr  SSKrSSKJr  SSKJrJ	r	  SSK
Jr   " S S\5      rg)	a  
This file contains deprecated code that can only be used with the old `model.fit`-style Sentence Transformers v2.X training.
It exists for backwards compatibility with the `model.old_fit` method, but will be removed in a future version.

Nowadays, with Sentence Transformers v3+, it is recommended to use the `SentenceTransformerTrainer` class to train models.
See https://www.sbert.net/docs/sentence_transformer/training_overview.html for more information.

See this script for more details on how to use the new training API:
https://github.com/UKPLab/sentence-transformers/blob/master/examples/sentence_transformer/unsupervised_learning/TSDAE/train_stsb_tsdae.py
    )annotationsN)Dataset)NLTK_IMPORT_ERRORis_nltk_available)InputExamplec                  J    \ rS rSrSrS 4S
S jjrS rS r\SS j5       r	Sr
g	)DenoisingAutoEncoderDataset   a  
The DenoisingAutoEncoderDataset returns InputExamples in the format: texts=[noise_fn(sentence), sentence]
It is used in combination with the DenoisingAutoEncoderLoss: Here, a decoder tries to re-construct the
sentence without noise.

Args:
    sentences: A list of sentences
    noise_fn: A noise function: Given a string, it returns a string
        with noise, e.g. deleted words
c                ,    [         R                  U 5      $ N)r	   delete)ss    r/var/www/html/shao/venv/lib/python3.13/site-packages/sentence_transformers/datasets/DenoisingAutoEncoderDataset.py<lambda>$DenoisingAutoEncoderDataset.<lambda>!   s    @[@b@bcd@e    c                    [        5       (       d3  [        [        R                  " U R                  R
                  5      5      eXl        X l        g r   )r   ImportErrorr   format	__class____name__	sentencesnoise_fn)selfr   r   s      r   __init__$DenoisingAutoEncoderDataset.__init__!   s6     ""/66t~~7N7NOPP" r   c                T    U R                   U   n[        U R                  U5      U/S9$ )N)texts)r   r   r   )r   itemsents      r   __getitem__'DenoisingAutoEncoderDataset.__getitem__(   s)    ~~d#4==#6"=>>r   c                ,    [        U R                  5      $ r   )lenr   )r   s    r   __len__#DenoisingAutoEncoderDataset.__len__,   s    4>>""r   c                T   SSK Jn  SSKJn  U" U 5      n[	        U5      nUS:X  a  U $ [
        R                  R                  U5      U:  n[        U5      S:X  a"  SU[
        R                  R                  U5      '   U" 5       R                  [
        R                  " U5      U   5      nU$ )Nr   )word_tokenize)TreebankWordDetokenizerT)nltkr(   nltk.tokenize.treebankr)   r$   nprandomrandsumchoice
detokenizearray)text	del_ratior(   r)   wordsnkeep_or_notwords_processeds           r   r   "DenoisingAutoEncoderDataset.delete0   s    &Bd#J6KiinnQ')3{q /3K		((+,13>>rxx{?[\r   )r   r   N)r   z	list[str])g333333?)r   
__module____qualname____firstlineno____doc__r   r!   r%   staticmethodr   __static_attributes__ r   r   r	   r	      s0    	 7f !?#  r   r	   )r=   
__future__r   numpyr,   torch.utils.datar   transformers.utils.import_utilsr   r   *sentence_transformers.readers.InputExampler   r	   r@   r   r   <module>rF      s'   	 #  $ P C)' )r   