
    <hI                        S SK r S SKrS SKJrJr  SSKJr  SSKJr  SSK	J
r
JrJrJr  SSKJrJr  \" 5       (       a
  S SKrSS	KJr  \" 5       (       a  SS
KJr  \R.                  " \5      r " S S\ R4                  5      r\
" \" SS95       " S S\5      5       r\
" \" SS95       " S S\5      5       r\
" \" SS95       " S S\5      5       rg)    N)AnyUnion   )GenerationConfig)TruncationStrategy)add_end_docstringsis_tf_availableis_torch_availablelogging   )Pipelinebuild_pipeline_init_args)/TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES),MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMESc                       \ rS rSrSrSrSrg)
ReturnType   r   r    N)__name__
__module____qualname____firstlineno__TENSORSTEXT__static_attributes__r       c/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/pipelines/text2text_generation.pyr   r      s    GDr   r   T)has_tokenizerc            	         ^  \ rS rSrSrSrSrSrSrSr	\
" SSS9rSrU 4S	 jr      SS
 jrS\S\S\4S jrS rS\\\\   4   S\S\\\\4      4U 4S jjr\R2                  4S jrS r\R:                  S4S jrSrU =r $ )Text2TextGenerationPipeline   a  
Pipeline for text to text generation using seq2seq models.

Unless the model you're using explicitly sets these generation parameters in its configuration files
(`generation_config.json`), the following default values will be used:
- max_new_tokens: 256
- num_beams: 4

Example:

```python
>>> from transformers import pipeline

>>> generator = pipeline(model="mrm8488/t5-base-finetuned-question-generation-ap")
>>> generator(
...     "answer: Manuel context: Manuel has created RuPERTa-base with the support of HF-Transformers and Google"
... )
[{'generated_text': 'question: Who created the RuPERTa-base?'}]
```

Learn more about the basics of using a pipeline in the [pipeline tutorial](../pipeline_tutorial). You can pass text
generation parameters to this pipeline to control stopping criteria, decoding strategy, and more. Learn more about
text generation parameters in [Text generation strategies](../generation_strategies) and [Text
generation](text_generation).

This Text2TextGenerationPipeline pipeline can currently be loaded from [`pipeline`] using the following task
identifier: `"text2text-generation"`.

The models that this pipeline can use are models that have been fine-tuned on a translation task. See the
up-to-date list of available models on
[huggingface.co/models](https://huggingface.co/models?filter=text2text-generation). For a list of available
parameters, see the [following
documentation](https://huggingface.co/docs/transformers/en/main_classes/text_generation#transformers.generation.GenerationMixin.generate)

Usage:

```python
text2text_generator = pipeline("text2text-generation")
text2text_generator("question: What is 42 ? context: 42 is the answer to life, the universe and everything")
```TF      )max_new_tokens	num_beams	generatedc                    > [         TU ]  " U0 UD6  U R                  U R                  S:X  a  [        5        g [
        5        g )Ntf)super__init__check_model_type	frameworkr   r   selfargskwargs	__class__s      r   r*   $Text2TextGenerationPipeline.__init__T   s>    $)&)~~% <	
 >	
r   c                    0 nUb  XXS'   Un	0 n
Ub*  Uc'  U(       a  [         R                  O[         R                  nUb  X:S'   Ub  XJS'   UbG  U R                  R	                  USS9n[        U5      S:  a  [        R                  " S5        US   US	'   U R                  b  U R                  U	S
'   U R                  b  U R                  U	S'   U R                  U	S'   XU
4$ )N
truncationreturn_typeclean_up_tokenization_spacesF)add_special_tokensr   zStopping on a multiple token sequence is not yet supported on transformers. The first token of the stop sequence will be used as the stop sequence string in the interim.r   eos_token_idassistant_model	tokenizerassistant_tokenizer)
r   r   r   r:   encodelenwarningswarnr9   r;   )r.   return_tensorsreturn_textr5   r6   r4   stop_sequencegenerate_kwargspreprocess_paramsforward_paramspostprocess_paramsstop_sequence_idss               r   _sanitize_parameters0Text2TextGenerationPipeline._sanitize_parameters]   s    !.8l+(%+*=0>*,,JOOK"0;}-'3A]=>$ $ 5 5mX] 5 ^$%)b /@.BON++040D0DN,-##/*...N;'484L4LN01 2DDDr   input_length
min_length
max_lengthc                     g)Z
Checks whether there might be something wrong with given input with regard to the model.
Tr   r.   rJ   rK   rL   s       r   check_inputs(Text2TextGenerationPipeline.check_inputs   s     r   c                   U R                   b  U R                   OSn[        US   [        5      (       a=  U R                  R                  c  [        S5      eUS    Vs/ sH  oCU-   PM	     sn4nSnO5[        US   [        5      (       a  X2S   -   4nSnO[        SUS    S35      eU R                  " X%XR                  S.6nS	U;   a  US		 U$ s  snf )
N r   zOPlease make sure that the tokenizer has a pad_token_id when using a batch inputTFz `args[0]`: zI have the wrong format. The should be either of type `str` or type `list`)paddingr4   r@   token_type_ids)	prefix
isinstancelistr:   pad_token_id
ValueErrorstr	TypeErrorr,   )r.   r4   r/   rV   argrT   inputss          r   _parse_and_tokenize/Text2TextGenerationPipeline._parse_and_tokenize   s     $ 7Rd1gt$$~~**2 !rss-1!W5Wcc\W57DGQ%%!W$&DGtAwi'pq  :^l^lmv%'( 6s   Cr/   r0   returnc                    > [         TU ]  " U0 UD6n[        US   [        5      (       aE  [	        S US    5       5      (       a+  [	        S U 5       5      (       a  U Vs/ sH  oDS   PM	     sn$ U$ s  snf )a  
Generate the output text(s) using text(s) given as inputs.

Args:
    args (`str` or `list[str]`):
        Input text for the encoder.
    return_tensors (`bool`, *optional*, defaults to `False`):
        Whether or not to include the tensors of predictions (as token indices) in the outputs.
    return_text (`bool`, *optional*, defaults to `True`):
        Whether or not to include the decoded texts in the outputs.
    clean_up_tokenization_spaces (`bool`, *optional*, defaults to `False`):
        Whether or not to clean up the potential extra spaces in the text output.
    truncation (`TruncationStrategy`, *optional*, defaults to `TruncationStrategy.DO_NOT_TRUNCATE`):
        The truncation strategy for the tokenization within the pipeline. `TruncationStrategy.DO_NOT_TRUNCATE`
        (default) will never truncate, but it is sometimes desirable to truncate the input to fit the model's
        max_length instead of throwing an error down the line.
    generate_kwargs:
        Additional keyword arguments to pass along to the generate method of the model (see the generate method
        corresponding to your framework [here](./text_generation)).

Return:
    A list or a list of list of `dict`: Each result comes as a dictionary with the following keys:

    - **generated_text** (`str`, present when `return_text=True`) -- The generated text.
    - **generated_token_ids** (`torch.Tensor` or `tf.Tensor`, present when `return_tensors=True`) -- The token
      ids of the generated text.
r   c              3   @   #    U H  n[        U[        5      v   M     g 7f)N)rW   r[   ).0els     r   	<genexpr>7Text2TextGenerationPipeline.__call__.<locals>.<genexpr>   s     :'BJr3'''s   c              3   <   #    U H  n[        U5      S :H  v   M     g7f)r   N)r=   )rd   ress     r   rf   rg      s     4VcCHMVs   )r)   __call__rW   rX   all)r.   r/   r0   resultri   r1   s        r   rj   $Text2TextGenerationPipeline.__call__   sq    : !4262tAw%%:$q':::4V444&,-fsFf-- .s   A1c                 0    U R                   " U4SU0UD6nU$ )Nr4   )r_   )r.   r^   r4   r0   s       r   
preprocess&Text2TextGenerationPipeline.preprocess   s!    ))&RZR6Rr   c                    U R                   S:X  a  US   R                  u  p4O9U R                   S:X  a)  [        R                  " US   5      R                  5       u  p4U R	                  WUR                  SU R                  R                  5      UR                  SU R                  R                  5      5        SU;  a  U R                  US'   U R                  R                  " S
0 UDUD6nUR                  S   nU R                   S:X  a'  UR                  " WXc-  /UR                  SS  Q76 nS	U0$ U R                   S:X  a+  [        R                  " UWXc-  /UR                  SS  Q75      nS	U0$ )Npt	input_idsr(   rK   rL   generation_configr   r   
output_idsr   )r,   shaper(   numpyrP   getrt   rK   rL   modelgeneratereshape)r.   model_inputsrC   in_brJ   ru   out_bs          r   _forward$Text2TextGenerationPipeline._forward   sc   >>T!!-k!:!@!@D,^^t#!#,{*C!D!J!J!LDd.D.D.O.OPd.D.D.O.OP	
 o5373I3IO/0ZZ((K<K?K
  #>>T!#++D%-W*BRBRSTSUBVWJ j)) ^^t#Ju}0\zGWGWXYXZG[0\]Jj))r   c                    / nUS   S    Hx  nU[         R                  :X  a  U R                   S3U0nO>U[         R                  :X  a*  U R                   S3U R                  R                  USUS90nUR                  W5        Mz     U$ )Nru   r   
_token_ids_textT)skip_special_tokensr6   )r   r   return_namer   r:   decodeappend)r.   model_outputsr5   r6   recordsru   records          r   postprocess'Text2TextGenerationPipeline.postprocess   s    '5a8Jj000!--.j9:F
/''(.0E0E",05Q 1F 1 NN6" 9 r   r   )NNNNNN)!r   r   r   r   __doc___pipeline_calls_generate_load_processor_load_image_processor_load_feature_extractor_load_tokenizerr   _default_generation_configr   r*   rH   intrP   r_   r   r[   rX   r   dictrj   r   DO_NOT_TRUNCATEro   r   r   r   r   r   __classcell__r1   s   @r   r    r       s    'R  $O!#O!1" K
 %)(ET # 3 *$eCcN3 $s $tDQTVYQYNG[ $L -?,N,N *0 6@__ch  r   r    c                   J   ^  \ rS rSrSrSrU 4S jrS\S\S\S\4S	 jr	S
r
U =r$ )SummarizationPipeline   a/  
Summarize news articles and other documents.

This summarizing pipeline can currently be loaded from [`pipeline`] using the following task identifier:
`"summarization"`.

The models that this pipeline can use are models that have been fine-tuned on a summarization task, which is
currently, '*bart-large-cnn*', '*google-t5/t5-small*', '*google-t5/t5-base*', '*google-t5/t5-large*', '*google-t5/t5-3b*', '*google-t5/t5-11b*'. See the up-to-date
list of available models on [huggingface.co/models](https://huggingface.co/models?filter=summarization). For a list
of available parameters, see the [following
documentation](https://huggingface.co/docs/transformers/en/main_classes/text_generation#transformers.generation.GenerationMixin.generate)

Unless the model you're using explicitly sets these generation parameters in its configuration files
(`generation_config.json`), the following default values will be used:
- max_new_tokens: 256
- num_beams: 4

Usage:

```python
# use bart in pytorch
summarizer = pipeline("summarization")
summarizer("An apple a day, keeps the doctor away", min_length=5, max_length=20)

# use t5 in tf
summarizer = pipeline("summarization", model="google-t5/t5-base", tokenizer="google-t5/t5-base", framework="tf")
summarizer("An apple a day, keeps the doctor away", min_length=5, max_length=20)
```summaryc                 $   > [         TU ]  " U0 UD6$ )a~  
Summarize the text(s) given as inputs.

Args:
    documents (*str* or `list[str]`):
        One or several articles (or one list of articles) to summarize.
    return_text (`bool`, *optional*, defaults to `True`):
        Whether or not to include the decoded texts in the outputs
    return_tensors (`bool`, *optional*, defaults to `False`):
        Whether or not to include the tensors of predictions (as token indices) in the outputs.
    clean_up_tokenization_spaces (`bool`, *optional*, defaults to `False`):
        Whether or not to clean up the potential extra spaces in the text output.
    generate_kwargs:
        Additional keyword arguments to pass along to the generate method of the model (see the generate method
        corresponding to your framework [here](./text_generation)).

Return:
    A list or a list of list of `dict`: Each result comes as a dictionary with the following keys:

    - **summary_text** (`str`, present when `return_text=True`) -- The summary of the corresponding input.
    - **summary_token_ids** (`torch.Tensor` or `tf.Tensor`, present when `return_tensors=True`) -- The token
      ids of the summary.
r)   rj   r-   s      r   rj   SummarizationPipeline.__call__  s    0 w000r   rJ   rK   rL   ra   c           	          X2:  a  [         R                  SU SU S35        X:  a#  [         R                  SU SU SUS-   S35        g	g	)
rN   zYour min_length=z' must be inferior than your max_length=.zYour max_length is set to z , but your input_length is only z. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=r   )NloggerwarningrO   s       r   rP   "SummarizationPipeline.check_inputs1  sm     "NN-j\9`ak`llmno$NN,ZL8XYeXf g^^jno^o]ppqs %r   r   )r   r   r   r   r   r   rj   r   boolrP   r   r   r   s   @r   r   r      s;    < K14 # 3 SW  r   r   c                      ^  \ rS rSrSrSrS\S\S\4S jr\R                  SSS	.U 4S
 jjr
SU 4S jjrU 4S jrSrU =r$ )TranslationPipelinei@  a  
Translates from one language to another.

This translation pipeline can currently be loaded from [`pipeline`] using the following task identifier:
`"translation_xx_to_yy"`.

The models that this pipeline can use are models that have been fine-tuned on a translation task. See the
up-to-date list of available models on [huggingface.co/models](https://huggingface.co/models?filter=translation).
For a list of available parameters, see the [following
documentation](https://huggingface.co/docs/transformers/en/main_classes/text_generation#transformers.generation.GenerationMixin.generate)

Unless the model you're using explicitly sets these generation parameters in its configuration files
(`generation_config.json`), the following default values will be used:
- max_new_tokens: 256
- num_beams: 4

Usage:

```python
en_fr_translator = pipeline("translation_en_to_fr")
en_fr_translator("How old are you?")
```translationrJ   rK   rL   c                 N    USU-  :  a  [         R                  SU SU S35        g)Ng?zYour input_length: z" is bigger than 0.9 * max_length: z`. You might consider increasing your max_length manually, e.g. translator('...', max_length=400)Tr   rO   s       r   rP    TranslationPipeline.check_inputs\  s=    #
**NN%l^3UV`Ua b^ ^ r   N)r4   src_langtgt_langc                   > [        U R                  SS 5      (       a'  U R                  R                  " X@R                  XUS.6$ [        TU ]  " USU06$ )N_build_translation_inputs)r@   r4   r   r   r4   )getattrr:   r   r,   r)   r_   )r.   r4   r   r   r/   r1   s        r   ro   TranslationPipeline.preprocessd  sQ    4>>#>EE>>;;nniq  7.LLLr   c                    > [         T	U ]  " S0 UD6u  pEnUb  XS'   Ub  X$S'   UcV  UcS  UR                  SU R                  5      nUR	                  S5      nU(       a  [        U5      S:X  a  US   US'   US   US'   XEU4$ )	Nr   r   task_r#   r      r   )r)   rH   rx   r   splitr=   )
r.   r   r   r0   rD   rE   rF   r   itemsr1   s
            r   rH   (TranslationPipeline._sanitize_parametersl  s    @E@\@f_e@f=+=,4j),4j) 0::fdii0DJJsOEE
a05a!*-05a!*- 2DDDr   c                 $   > [         TU ]  " U0 UD6$ )a  
Translate the text(s) given as inputs.

Args:
    args (`str` or `list[str]`):
        Texts to be translated.
    return_tensors (`bool`, *optional*, defaults to `False`):
        Whether or not to include the tensors of predictions (as token indices) in the outputs.
    return_text (`bool`, *optional*, defaults to `True`):
        Whether or not to include the decoded texts in the outputs.
    clean_up_tokenization_spaces (`bool`, *optional*, defaults to `False`):
        Whether or not to clean up the potential extra spaces in the text output.
    src_lang (`str`, *optional*):
        The language of the input. Might be required for multilingual models. Will not have any effect for
        single pair translation models
    tgt_lang (`str`, *optional*):
        The language of the desired output. Might be required for multilingual models. Will not have any effect
        for single pair translation models
    generate_kwargs:
        Additional keyword arguments to pass along to the generate method of the model (see the generate method
        corresponding to your framework [here](./text_generation)).

Return:
    A list or a list of list of `dict`: Each result comes as a dictionary with the following keys:

    - **translation_text** (`str`, present when `return_text=True`) -- The translation.
    - **translation_token_ids** (`torch.Tensor` or `tf.Tensor`, present when `return_tensors=True`) -- The
      token ids of the translation.
r   r-   s      r   rj   TranslationPipeline.__call__|  s    < w000r   r   )NN)r   r   r   r   r   r   r   rP   r   r   ro   rH   rj   r   r   r   s   @r   r   r   @  sV    0  K # 3  ,>+M+MX\gk M ME 1 1r   r   )enumr>   typingr   r   
generationr   tokenization_utilsr   utilsr   r	   r
   r   baser   r   
tensorflowr(   models.auto.modeling_tf_autor   models.auto.modeling_autor   
get_loggerr   r   Enumr   r    r   r   r   r   r   <module>r      s       ) 3 T T 4 ^X			H	% 
 ,4@AV( V BVr ,4@AG7 G BGT ,4@AY15 Y1 BY1r   