o
    sh                     @   s   d dl Z d dlmZmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ e eZG dd deZG d	d
 d
eZeG dd deZdS )    N)	dataclassfield)Union)TrainingArguments)ParallelMode)ExplicitEnumc                   @   s   e Zd ZdZdZdZdZdS )BatchSamplersa  
    Stores the acceptable string identifiers for batch samplers.

    The batch sampler is responsible for determining how samples are grouped into batches during training.
    Valid options are:

    - ``BatchSamplers.BATCH_SAMPLER``: The default PyTorch batch sampler.
    - ``BatchSamplers.NO_DUPLICATES``: Ensures no duplicate samples in a batch.
    - ``BatchSamplers.GROUP_BY_LABEL``: Ensures each batch has 2+ samples from the same label.
    batch_samplerno_duplicatesgroup_by_labelN)__name__
__module____qualname____doc__BATCH_SAMPLERNO_DUPLICATESGROUP_BY_LABEL r   r   a/var/www/html/alpaca_bot/venv/lib/python3.10/site-packages/sentence_transformers/training_args.pyr      s
    r   c                   @   s   e Zd ZdZdZdZdS )MultiDatasetBatchSamplersa  
    Stores the acceptable string identifiers for multi-dataset batch samplers.

    The multi-dataset batch sampler is responsible for determining in what order batches are sampled from multiple
    datasets during training. Valid options are:

    - ``MultiDatasetBatchSamplers.ROUND_ROBIN``: Round-robin sampling from each dataset until one is exhausted.
      With this strategy, it's likely that not all samples from each dataset are used, but each dataset is sampled
      from equally.
    - ``MultiDatasetBatchSamplers.PROPORTIONAL``: Sample from each dataset in proportion to its size [default].
      With this strategy, all samples from each dataset are used and larger datasets are sampled from more frequently.
    round_robinproportionalN)r   r   r   r   ROUND_ROBINPROPORTIONALr   r   r   r   r      s    r   c                       sf   e Zd ZU dZeejddidZeee	f e
d< eejddidZeee	f e
d<  fdd	Z  ZS )
$SentenceTransformerTrainingArgumentsa  
    SentenceTransformerTrainingArguments extends :class:`~transformers.TrainingArguments` with additional arguments
    specific to Sentence Transformers. See :class:`~transformers.TrainingArguments` for the complete list of
    available arguments.

    Args:
        output_dir (`str`):
            The output directory where the model checkpoints will be written.
        batch_sampler (Union[:class:`~sentence_transformers.training_args.BatchSamplers`, `str`], *optional*):
            The batch sampler to use. See :class:`~sentence_transformers.training_args.BatchSamplers` for valid options.
            Defaults to ``BatchSamplers.BATCH_SAMPLER``.
        multi_dataset_batch_sampler (Union[:class:`~sentence_transformers.training_args.MultiDatasetBatchSamplers`, `str`], *optional*):
            The multi-dataset batch sampler to use. See :class:`~sentence_transformers.training_args.MultiDatasetBatchSamplers`
            for valid options. Defaults to ``MultiDatasetBatchSamplers.PROPORTIONAL``.
    helpzThe batch sampler to use.)defaultmetadatar	   z'The multi-dataset batch sampler to use.multi_dataset_batch_samplerc                    s   t    t| j| _t| j| _d| _d| _| jt	j
kr+| jdkr)td d S d S | jt	jkrC| jsE| jdkr>td d| _d S d S d S )NTFunusedzCurrently using DataParallel (DP) for multi-gpu training, while DistributedDataParallel (DDP) is recommended for faster training. See https://sbert.net/docs/sentence_transformer/training/distributed.html for more information.zWhen using DistributedDataParallel (DDP), it is recommended to set `dataloader_drop_last=True` to avoid hanging issues with an uneven last batch. Setting `dataloader_drop_last=True`.)super__post_init__r   r	   r   r   prediction_loss_onlyddp_broadcast_buffersparallel_moder   NOT_DISTRIBUTED
output_dirloggerwarningDISTRIBUTEDdataloader_drop_last)self	__class__r   r   r!   H   s$   



z2SentenceTransformerTrainingArguments.__post_init__)r   r   r   r   r   r   r   r	   r   str__annotations__r   r   r   r!   __classcell__r   r   r,   r   r   /   s   
 

r   )loggingdataclassesr   r   typingr   transformersr   TransformersTrainingArgumentstransformers.training_argsr   transformers.utilsr   	getLoggerr   r'   r   r   r   r   r   r   r   <module>   s    
