
    Ch=                    b    S r SSKJr  SSKrSSKrSSKrSSKJr   " S S5      r " S S	\5      r	g)
a@  
This file contains deprecated code that can only be used with the old `model.fit`-style Sentence Transformers v2.X training.
It exists for backwards compatibility with the `model.old_fit` method, but will be removed in a future version.

Nowadays, with Sentence Transformers v3+, it is recommended to use the `SentenceTransformerTrainer` class to train models.
See https://www.sbert.net/docs/sentence_transformer/training_overview.html for more information.

Instead, you should create a `datasets` `Dataset` for training: https://huggingface.co/docs/datasets/create_dataset
    )annotationsN   )InputExamplec                  P    \ rS rSrSrSSSS\R                  SSS4S	 jrSS
 jrSr	g)STSDataReader   a)  Reads in the STS dataset. Each line contains two sentences (s1_col_idx, s2_col_idx) and one label (score_col_idx)

Default values expects a tab separated file with the first & second column the sentence pair and third column the score (0...1). Default config normalizes scores from 0...5 to 0...1
r   r      	T   c
                p    Xl         X@l        X l        X0l        XPl        X`l        Xpl        Xl        Xl        g )N)	dataset_folderscore_col_idx
s1_col_idx
s2_col_idx	delimiterquotingnormalize_scores	min_score	max_score)
selfr   r   r   r   r   r   r   r   r   s
             c/var/www/html/shao/venv/lib/python3.13/site-packages/sentence_transformers/readers/STSDataReader.py__init__STSDataReader.__init__   s4     -*$$" 0""    c           
        [         R                  R                  U R                  U5      nUR	                  S5      (       a  [
        R                  " USSS9O	[        USS9 n[        R                  " X@R                  U R                  S9n/ n[        U5       H  u  px[        XR                     5      n	U R                  (       a(  XR                  -
  U R                   U R                  -
  -  n	XR"                     n
XR$                     nUR'                  [)        U[+        U5      -   X/U	S95        US:  d  M  [-        U5      U:  d  M    O   S	S	S	5        U$ ! , (       d  f       W$ = f)
zJfilename specified which data split to use (train.csv, dev.csv, test.csv).z.gzrtutf8)encodingzutf-8)r   r   )guidtextslabelr   N)ospathjoinr   endswithgzipopencsvreaderr   r   	enumeratefloatr   r   r   r   r   r   appendr   strlen)r   filenamemax_examplesfilepathfIndataexamplesidrowscores1s2s               r   get_examplesSTSDataReader.get_examples0   s)   77<< 3 3X>   '' IIhv6h12 ::c^^T\\RDH$T?c"4"456(("^^38WXE))(SW2DRH\a bc!#H(E +2$ %2 2$ s   !CE8E	E
E&)	r   r   r   r   r   r   r   r   r   N)r   )
__name__
__module____qualname____firstlineno____doc__r(   
QUOTE_NONEr   r:   __static_attributes__ r   r   r   r      s/     #,r   r   c                  T   ^  \ rS rSrSrSSSS\R                  SSS4U 4S	 jjrS
rU =r	$ )STSBenchmarkDataReaderI   zReader especially for the STS benchmark dataset. There, the sentences are in column 5 and 6, the score is in column 4.
Scores are normalized from 0...5 to 0...1
r         r
   Tr   c
                0   > [         T
U ]  UUUUUUUUU	S9	  g )N)	r   r   r   r   r   r   r   r   r   )superr   )r   r   r   r   r   r   r   r   r   r   	__class__s             r   r   STSBenchmarkDataReader.__init__N   s2     	)!!'- 	 
	
r   rC   )
r<   r=   r>   r?   r@   r(   rA   r   rB   __classcell__)rK   s   @r   rE   rE   I   s0     
 
r   rE   )
r@   
__future__r   r(   r&   r"    r   r   rE   rC   r   r   <module>rP      s2    # 
  	 2 2j
] 
r   