o
    sh]                     @   s  d dl Z d dlZd dlZd dlmZ d dlmZmZmZm	Z	m
Z
mZmZmZ d dlZd dlmZ d dlmZmZmZmZ d dlmZmZmZmZ d dlmZ d dlmZ d d	lmZ d d
l m!Z! d dl"m#Z# d dl$m%Z% d dl&m'Z'm(Z( d dl)m*Z* d dl+m,Z, d dl-m.Z. d dl/m0Z0m1Z1m2Z2m3Z3m4Z4 d dl5m6Z6m7Z7m8Z8 d dl9m:Z:m;Z;m<Z< e; rd dl=m>Z>m?Z? e @eAZBerd dlCmDZD G dd deZEdS )    N)nullcontext)TYPE_CHECKINGAnyCallableDictListOptionalTupleUnion)nn)BatchSamplerConcatDataset
DataLoaderSubsetRandomSampler)EvalPredictionPreTrainedTokenizerBaseTrainerTrainerCallback)DataCollator)WandbCallback)TRAINING_ARGS_NAME)EvalLoopOutput)ParallelMode)SentenceTransformerDataCollator)SentenceEvaluatorSequentialEvaluator)
CoSENTLoss)ModelCardCallback)Transformer)DefaultBatchSamplerGroupByLabelBatchSamplerNoDuplicatesBatchSamplerProportionalBatchSamplerRoundRobinBatchSampler)BatchSamplersMultiDatasetBatchSamplers$SentenceTransformerTrainingArguments)disable_loggingis_datasets_availableis_training_available)DatasetDatasetDictSentenceTransformerc                       s\  e Zd ZdZ													dZded dedeedd	eedf f  d
eedd	eedf f  deee	j
eee	j
f edgej	j
f eeedgej	j
f f f  deeeee f  dee deeeef  deeg df  deeegef  deee  deejjejjjf deeejejgejf  ddf fddZd[d\ fddZdej	j
dddej	j
fddZdeedgej	j
f ej	j
f dddej	j
fddZd]ddZ	 d^ddd!eeeejef f d"edeejeejeeef f f fd#d$Z d!eeeejef f deeeeejf  eej f fd%d&Z!			'd_d
eedeedf f  d(eee  d)edeee"f f fd*d+Z#			'd_d,e$d-ed.ee d(eee  d)ede%f fd/d0Z&d` fd1d2Z'd[d3dd4ee defd5d6Z(		dd3dd7e)d8ed9eee  d:eej* de+fd;d<Z,		=dad3e-d>ee+ d:eej* d?ee) de+f
d@dAZ.de$fdBdCZ/d[d
edD de$fdEdFZ0dGdde$fdHdIZ1ddJee ddfdKdLZ2dMeddfdNdOZ3									dbdPee dQee dReeee df dSee dTee dUeeee df dVeeee df d3eeee df dWeeee df ddfdXdYZ4  Z5S )cSentenceTransformerTraineru  
    SentenceTransformerTrainer is a simple but feature-complete training and eval loop for PyTorch
    based on the 🤗 Transformers :class:`~transformers.Trainer`.

    This trainer integrates support for various :class:`transformers.TrainerCallback` subclasses, such as:

    - :class:`~transformers.integrations.WandbCallback` to automatically log training metrics to W&B if `wandb` is installed
    - :class:`~transformers.integrations.TensorBoardCallback` to log training metrics to TensorBoard if `tensorboard` is accessible.
    - :class:`~transformers.integrations.CodeCarbonCallback` to track the carbon emissions of your model during training if `codecarbon` is installed.

        - Note: These carbon emissions will be included in your automatically generated model card.

    See the Transformers `Callbacks <https://huggingface.co/docs/transformers/main/en/main_classes/callback>`_
    documentation for more information on the integrated callbacks and how to write your own callbacks.

    Args:
        model (:class:`~sentence_transformers.SentenceTransformer`, *optional*):
            The model to train, evaluate or use for predictions. If not provided, a `model_init` must be passed.
        args (:class:`~sentence_transformers.training_args.SentenceTransformerTrainingArguments`, *optional*):
            The arguments to tweak for training. Will default to a basic instance of
            :class:`~sentence_transformers.training_args.SentenceTransformerTrainingArguments` with the
            `output_dir` set to a directory named *tmp_trainer* in the current directory if not provided.
        train_dataset (Union[:class:`datasets.Dataset`, :class:`datasets.DatasetDict`, Dict[str, :class:`datasets.Dataset`]], *optional*):
            The dataset to use for training. Must have a format accepted by your loss function, see
            `Training Overview > Dataset Format <../../../docs/sentence_transformer/training_overview.html#dataset-format>`_.
        eval_dataset (Union[:class:`datasets.Dataset`, :class:`datasets.DatasetDict`, Dict[str, :class:`datasets.Dataset`]], *optional*):
            The dataset to use for evaluation. Must have a format accepted by your loss function, see
            `Training Overview > Dataset Format <../../../docs/sentence_transformer/training_overview.html#dataset-format>`_.
        loss (Optional[Union[:class:`torch.nn.Module`, Dict[str, :class:`torch.nn.Module`],            Callable[[:class:`~sentence_transformers.SentenceTransformer`], :class:`torch.nn.Module`],            Dict[str, Callable[[:class:`~sentence_transformers.SentenceTransformer`]]]], *optional*):
            The loss function to use for training. Can either be a loss class instance, a dictionary mapping dataset names to
            loss class instances, a function that returns a loss class instance given a model, or a dictionary mapping
            dataset names to functions that return a loss class instance given a model. In practice, the latter two
            are primarily used for hyper-parameter optimization. Will default to
            :class:`~sentence_transformers.losses.CoSENTLoss` if no ``loss`` is provided.
        evaluator (Union[:class:`~sentence_transformers.evaluation.SentenceEvaluator`,            List[:class:`~sentence_transformers.evaluation.SentenceEvaluator`]], *optional*):
            The evaluator instance for useful evaluation metrics during training. You can use an ``evaluator`` with
            or without an ``eval_dataset``, and vice versa. Generally, the metrics that an ``evaluator`` returns
            are more useful than the loss value returned from the ``eval_dataset``. A list of evaluators will be
            wrapped in a :class:`~sentence_transformers.evaluation.SequentialEvaluator` to run them sequentially.
        callbacks (List of [:class:`transformers.TrainerCallback`], *optional*):
            A list of callbacks to customize the training loop. Will add those to the list of default callbacks
            detailed in [here](callback).

            If you want to remove one of the default callbacks used, use the [`Trainer.remove_callback`] method.
        optimizers (`Tuple[:class:`torch.optim.Optimizer`, :class:`torch.optim.lr_scheduler.LambdaLR`]`, *optional*, defaults to `(None, None)`):
            A tuple containing the optimizer and the scheduler to use. Will default to an instance of :class:`torch.optim.AdamW`
            on your model and a scheduler given by :func:`transformers.get_linear_schedule_with_warmup` controlled by `args`.

    Important attributes:

        - **model** -- Always points to the core model. If using a transformers model, it will be a [`PreTrainedModel`]
          subclass.
        - **model_wrapped** -- Always points to the most external model in case one or more other modules wrap the
          original model. This is the model that should be used for the forward pass. For example, under `DeepSpeed`,
          the inner model is wrapped in `DeepSpeed` and then again in `torch.nn.DistributedDataParallel`. If the inner
          model hasn't been wrapped, then `self.model_wrapped` is the same as `self.model`.
        - **is_model_parallel** -- Whether or not a model has been switched to a model parallel mode (different from
          data parallelism, this means some of the model layers are split on different GPUs).
        - **place_model_on_device** -- Whether or not to automatically place the model on the device - it will be set
          to `False` if model parallel or deepspeed is used, or if the default
          `TrainingArguments.place_model_on_device` is overridden to return `False` .
        - **is_in_train** -- Whether or not a model is currently running `train` (e.g. when `evaluate` is called while
          in `train`)

    NNNmodelr-   argstrain_datasetr*   r+   eval_datasetloss	evaluatordata_collator	tokenizer
model_initcompute_metrics	callbacks
optimizerspreprocess_logits_for_metricsreturnc                    s  t  std|d u rd}td| d t|d}n	t|ts%td d u r9|	d ur5|	_  ntd|	d urCt	
dt |	_td	d }|jr[ jjs[ j|j |d u rht jtrh j}|d u rrt jd
}t|trt|tst|}t|trt|tst|}t jjrd n ||||||	|
|||d d_   tdd jjD rtj dd |d u rtd t!j"}t|tr, fdd|# D _$t%ddg||gD ]A\}}|d u rqt|tstd| dt&|' t&|'   }r*td| dt(| dt)|dkr!dnd d| d	qn*| _$|d urBt|t+sBt,|}|_-t.|}/| |0j1j2j3j" d S ) NzTo train a SentenceTransformer model, you need to install the `accelerate` and `datasets` modules. You can do so with the `train` extra:
pip install -U "sentence-transformers[train]"tmp_trainerz1No `TrainingArguments` passed, using `output_dir=z`.)
output_dirzEPlease use `TrainingArguments` imported from `sentence_transformers`.z<`Trainer` requires either a `model` or `model_init` argumentz`Trainer` requires either a `model` or `model_init` argument, but not both. `model_init` will overwrite your model when calling the `train` method. This will become a fatal error in the next release.unused)tokenize_fn)r0   r1   r6   r2   r3   r7   r8   r9   r:   r;   r<   Tc                 S   s   g | ]}t |tqS  )
isinstancer   ).0callbackrB   rB   [/var/www/html/alpaca_bot/venv/lib/python3.10/site-packages/sentence_transformers/trainer.py
<listcomp>   s    z7SentenceTransformerTrainer.__init__.<locals>.<listcomp>WANDB_PROJECTzsentence-transformersz@No `loss` passed, using `losses.CoSENTLoss` as a default option.c                    s   i | ]\}}| | qS rB   )prepare_loss)rD   dataset_nameloss_fnr0   selfrB   rF   
<dictcomp>   s    z7SentenceTransformerTrainer.__init__.<locals>.<dictcomp>trainevalz,If the provided `loss` is a dict, then the `z"_dataset` must be a `DatasetDict`.z:If the provided `loss` is a dict, then all keys from the `z;_dataset` dictionary must occur in `loss` also. Currently, z occur   s z in `z_dataset` but not in `loss`.)4r)   RuntimeErrorloggerinfor&   rC   
ValueErrorr8   call_model_initwarningswarnFutureWarningto_dicthub_model_idmodel_card_datamodel_idset_model_idr7   r   r   tokenizedictr+   r*   super__init__can_return_lossanycallback_handlerr:   osenviron
setdefaultr   r0   itemsr4   zipsetkeyssortedlenrI   r   r   r5   r   add_callbackon_init_endr1   statecontrol)rM   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r?   default_args_dictrJ   datasetmissingmodel_card_callback	__class__rL   rF   rd   s   s   







z#SentenceTransformerTrainer.__init__c                    s   t  j|d}t| ds|S t| jtr<| j D ] \}}t|tjj	s,||| j|< qt|dr9| 
| j|| _q|S t| jtjj	sL| || _|S t| jdrZ| 
| j|| _|S )N)trialr4   r0   )rc   rX   hasattrrC   r4   rb   rk   torchr   Moduleoverride_model_in_loss)rM   r{   r0   keyrK   ry   rB   rF   rX      s"   

	z*SentenceTransformerTrainer.call_model_initc                 C   s^   ddl m} | D ]"\}}|dkrt||r||_q
t|tjjr,t||| 	|| q
|S )Nr   r,   r0   )
sentence_transformersr-   named_childrenrC   r0   r}   r   r~   setattrr   )rM   r4   r0   r-   namechildrB   rB   rF   r   
  s   z1SentenceTransformerTrainer.override_model_in_lossc                 C   s*   t |tjjr||jS |||jS N)rC   r}   r   r~   todevice)rM   r4   r0   rB   rB   rF   rI     s   z'SentenceTransformerTrainer.prepare_lossdataset_dictc                 C   s:   |  D ]\}}d|jvr|d|gt| ||< q|S )NrJ   )rk   column_names
add_columnrp   )rM   r   r   rv   rB   rB   rF   add_dataset_name_column  s
   
z2SentenceTransformerTrainer.add_dataset_name_columnFinputsreturn_outputsc           	      C   s~   | dd}| |\}}| j}t|tr|r|| }| jjtjkr2t	|dr2t	|dr2| 
||}|||}|r=|i fS |S )a$  
        Computes the loss for the SentenceTransformer model.

        It uses ``self.loss`` to compute the loss, which can be a single loss function or a dictionary of loss functions
        for different datasets. If the loss is a dictionary, the dataset name is expected to be passed in the inputs
        under the key "dataset_name". This is done automatically in the ``add_dataset_name_column`` method.
        Note that even if ``return_outputs = True``, the outputs will be empty, as the SentenceTransformers losses do not
        return outputs.

        Args:
            model (SentenceTransformer): The SentenceTransformer model.
            inputs (Dict[str, Union[torch.Tensor, Any]]): The input data for the model.
            return_outputs (bool, optional): Whether to return the outputs along with the loss. Defaults to False.

        Returns:
            Union[torch.Tensor, Tuple[torch.Tensor, Dict[str, Any]]]: The computed loss. If `return_outputs` is True, returns a tuple of loss and outputs. Otherwise, returns only the loss.
        rJ   Nmoduler0   )popcollect_featuresr4   rC   rb   r1   parallel_moder   NOT_PARALLELr|   r   )	rM   r0   r   r   rJ   featureslabelsrK   r4   rB   rB   rF   compute_loss#  s   
z'SentenceTransformerTrainer.compute_lossc                    s   g }|D ]>}| dr|dtd   n| dr$|dtd   n| dr3|dtd   nq| fdd	| D  q|d
d}||fS )a  Turn the inputs from the dataloader into the separate model inputs & the labels.

        Example::

            >>> list(inputs.keys())
            ['return_loss', 'label', 'sentence_0_input_ids', 'sentence_0_token_type_ids', 'sentence_0_attention_mask', 'sentence_1_input_ids', 'sentence_1_token_type_ids', 'sentence_1_attention_mask']
            >>> features, labels = self.collect_features(inputs)
            >>> len(features)
            2
            >>> list(features[0].keys())
            ['input_ids', 'token_type_ids', 'attention_mask']
            >>> list(features[1].keys())
            ['input_ids', 'token_type_ids', 'attention_mask']
            >>> torch.equal(labels, inputs["label"])
            True
        
_input_idsN	input_ids_sentence_embeddingsentence_embedding_pixel_valuespixel_valuesc                    s,   i | ]\}}|  r|t d  |qS r   )
startswithrp   )rD   r   valueprefixrB   rF   rN   q  s   , z?SentenceTransformerTrainer.collect_features.<locals>.<dictcomp>label)endswithrp   appendrk   get)rM   r   r   columnr   rB   r   rF   r   R  s   


z+SentenceTransformerTrainer.collect_featuresrP   ignore_keysmetric_key_prefixc                    sB   |d ur|n| j }t|trt| jtr| |}t |||S r   )r3   rC   r+   r4   rb   r   rc   evaluate)rM   r3   r   r   ry   rB   rF   r   u  s   
z#SentenceTransformerTrainer.evaluate
dataloaderdescriptionprediction_loss_onlyc           	         s  t  j|||||d}| jd u r|S | jr4t| jtr4|dr4|dd  t| j	 d kr2d}n|S | 
 r;t nttj | | j}W d    n1 sQw   Y  t|ts_d|i}t|	 D ]}|| ds{|||| d| < qe|j| |S )N)r   r   r   r   r   eval_   r   rP   r5   _)rc   evaluation_loopr5   is_in_trainrC   r3   rb   r   listrn   is_local_process_zeror   r'   loggingINFOr0   r   metricsupdate)	rM   r   r   r   r   r   outputevaluator_metricsr   ry   rB   rF   r     s0   
	
z*SentenceTransformerTrainer.evaluation_loopc              	      s   t | jd tstd d S z| jj }r'|ddd }| jj	t
| W n	 ty1   Y nw | j}| jd j| _zt  W | j}|| _|| jd _S | j}|| _|| jd _w )Nr   zLCould not load best model, as the model is not a `transformers`-based model.-rQ   )rC   r0   r   rU   rV   rs   best_model_checkpointrsplitr^   set_best_model_stepint	Exception
auto_modelrc   _load_best_model)rM   
checkpointstep
full_modelloaded_auto_modelry   rB   rF   r     s*   

z+SentenceTransformerTrainer._load_best_modelrv   rJ   c                 C   s@   t |jddh@  }rtd|r|d nd dt| dd S )Nreturn_lossrJ   z/The following column names are invalid in your  rS   z	dataset: zH. Avoid using these column names, as they are reserved for internal use.)rm   r   rW   r   )rM   rv   rJ   overlaprB   rB   rF   validate_column_names  s
    z0SentenceTransformerTrainer.validate_column_names
batch_size	drop_lastvalid_label_columns	generatorc                 C   sn   | j jtjkrt|||||dS | j jtjkrt||||dS | j jtjkr5tt	t
t||d||dS d S )N)rv   r   r   r   r   )rv   r   r   r   )r   )r   r   )r1   batch_samplerr$   NO_DUPLICATESr!   GROUP_BY_LABELr    BATCH_SAMPLERr   r   rangerp   )rM   rv   r   r   r   r   rB   rB   rF   get_batch_sampler  s,   z,SentenceTransformerTrainer.get_batch_samplerr   batch_samplersseedc                 C   s@   | j jtjkrt||||dS | j jtjkrt||||dS d S )Nrv   r   r   r   )r1   multi_dataset_batch_samplerr%   ROUND_ROBINr#   PROPORTIONALr"   )rM   rv   r   r   r   rB   rB   rF   get_multi_dataset_batch_sampler  s   z:SentenceTransformerTrainer.get_multi_dataset_batch_samplerc                    s,  j du r	tdj }j t jjrjj t|t	r]|
 D ]\}}j||d q'tjtr>|} fdd| D }t| }j||jjd}n| j|jjjj jd} jjjjjjjj|d}d	j_jt|fi |_jS )
a@  
        Returns the training [`~torch.utils.data.DataLoader`].

        Will use no sampler if `train_dataset` does not implement `__len__`, a random sampler (adapted to distributed
        training if necessary) otherwise.

        Subclass and override this method if you want to inject some custom behavior.
        Nz+Trainer: training requires a train_dataset.rJ   c              	      *   g | ]}j |jjjj jd qS r   r   r   r   r   r1   per_device_train_batch_sizedataloader_drop_lastr   rD   rv   r6   r   rM   rB   rF   rG         zCSentenceTransformerTrainer.get_train_dataloader.<locals>.<listcomp>r   r   
collate_fnnum_workers
pin_memorypersistent_workersprefetch_factorr   F)r2   rW   r6   r}   	Generatorr1   r   manual_seedrC   r+   rk   r   r4   rb   r   valuesr   r   r   train_batch_sizer   r   dataloader_num_workersdataloader_pin_memorydataloader_persistent_workersdataloader_prefetch_factoracceleratoreven_batchespreparer   _train_dataloader)rM   r2   rJ   rv   r   r   dataloader_paramsrB   r   rF   get_train_dataloader  sP   
	


	z/SentenceTransformerTrainer.get_train_dataloader)r*   Nc                    s   |du rj du rjdurtg S td|dur|nj }j t jjr1	jj t
|tr`t
jtrA|} fdd| D }t| }j||jjd}nj|jjjj jd} jjjjjjjj|d}dj_jt|fi |S )	a  
        Returns the evaluation [`~torch.utils.data.DataLoader`].

        Subclass and override this method if you want to inject some custom behavior.

        Args:
            eval_dataset (`torch.utils.data.Dataset`, *optional*):
                If provided, will override `self.eval_dataset`. If it is a [`~datasets.Dataset`], columns not accepted
                by the `model.forward()` method are automatically removed. It must implement `__len__`.
        Nz-Trainer: evaluation requires an eval_dataset.c              	      r   r   )r   r1   per_device_eval_batch_sizer   r   r   r   rB   rF   rG   c  r   zBSentenceTransformerTrainer.get_eval_dataloader.<locals>.<listcomp>r   r   r   T)r3   r5   r   rW   r6   r}   r   r1   r   r   rC   r+   r4   rb   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )rM   r3   r   r   r   rB   r   rF   get_eval_dataloaderH  sL   


	z.SentenceTransformerTrainer.get_eval_dataloadertest_datasetc                    s  j  t jjrjj t|trQ| D ]\}}j	||d qtj
tr2|} fdd| D }t| }j||jjd}n	| j|jjjj jd} jjjjjjjj|d}dj_jt|fi |_jS )a  
        Returns the training [`~torch.utils.data.DataLoader`].

        Subclass and override this method if you want to inject some custom behavior.

        Args:
            test_dataset (`torch.utils.data.Dataset`, *optional*):
                The test dataset to use. If it is a [`~datasets.Dataset`], columns not accepted by the
                `model.forward()` method are automatically removed. It must implement `__len__`.
        r   c              	      r   r   r   r   r   rB   rF   rG     r   zBSentenceTransformerTrainer.get_test_dataloader.<locals>.<listcomp>r   r   r   F)r6   r}   r   r1   r   r   rC   r+   rk   r   r4   rb   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )rM   r   rJ   rv   r   r   r   rB   r   rF   get_test_dataloader  sJ   


	z.SentenceTransformerTrainer.get_test_dataloaderr?   c                 C   sx   |d ur|n| j j}tj|dd td|  | jj|| j jd | j	d ur.| j	| t
| j tj|t d S )NT)exist_okzSaving model checkpoint to )safe_serialization)r1   r?   rh   makedirsrU   rV   r0   save_pretrainedsave_safetensorsr7   r}   savepathjoinr   )rM   r?   
state_dictrB   rB   rF   _save  s   
z SentenceTransformerTrainer._savecheckpoint_pathc                 C   s(   ddl m} ||}| j|  d S )Nr   r,   )r   r-   r0   load_state_dictr  )rM   r  r-   loaded_modelrB   rB   rF   _load_from_checkpoint  s   z0SentenceTransformerTrainer._load_from_checkpointlanguagelicensetags
model_namefinetuned_fromtasksdataset_tagsdataset_argsc
                 K   sZ   |   sd S |r| jj| |r| jj| |r!| jj| | jj| jj|d d S )N)r  )	is_world_process_zeror0   r^   set_languageset_licenseadd_tags_create_model_cardr1   r?   )rM   r  r  r  r  r  r  r  rv   r  kwargsrB   rB   rF   create_model_card  s   z,SentenceTransformerTrainer.create_model_card)NNNNNNNNNNNr/   Nr   )r=   r-   )r   r+   r=   r+   )F)NNrP   )r=   N)Nr   )	NNNNNNNNN)6__name__
__module____qualname____doc__r   r&   r
   r   strr   r~   r   r}   r   r   r   r   r   r   r	   optim	Optimizerlr_schedulerLambdaLRTensorrd   rX   r   rI   r   r   boolr   r   floatr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r
  r  __classcell__rB   rB   ry   rF   r.   -   sT   G
~


	

/
%


+

$
FED	
r.   )Fr   rh   rY   
contextlibr   typingr   r   r   r   r   r   r	   r
   r}   r   torch.utils.datar   r   r   r   transformersr   r   r   r   transformers.data.data_collatorr   transformers.integrationsr   transformers.trainerr   transformers.trainer_utilsr   transformers.training_argsr   #sentence_transformers.data_collatorr    sentence_transformers.evaluationr   r   'sentence_transformers.losses.CoSENTLossr    sentence_transformers.model_cardr   (sentence_transformers.models.Transformerr   sentence_transformers.samplerr   r    r!   r"   r#   #sentence_transformers.training_argsr$   r%   r&   sentence_transformers.utilr'   r(   r)   datasetsr*   r+   	getLoggerr  rU   )sentence_transformers.SentenceTransformerr-   r.   rB   rB   rB   rF   <module>   s8    (
