
    <h	1                         S SK r S SKJr  S SKrSSKJr  SSKJrJ	r	  SSK
JrJrJr  \	R                  " \5      r " S S	\5      r\" \" S
S95       " S S\5      5       rg)    N)Union   )TruncationStrategy)add_end_docstringslogging   )ArgumentHandlerChunkPipelinebuild_pipeline_init_argsc                   $    \ rS rSrSrS rS rSrg)%ZeroShotClassificationArgumentHandler   z}
Handles arguments for zero-shot for text classification by turning each possible label into an NLI
premise/hypothesis pair.
c                     [        U[        5      (       aD  UR                  S5       Vs/ sH)  o"R                  5       (       d  M  UR                  5       PM+     nnU$ s  snf )N,)
isinstancestrsplitstrip)selflabelslabels      g/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/pipelines/zero_shot_classification.py_parse_labels3ZeroShotClassificationArgumentHandler._parse_labels   sG    fc""17c1BT1Bkkmmekkm1BFT Us   AAc           
      Z   [        U5      S:X  d  [        U5      S:X  a  [        S5      eUR                  US   5      U:X  a  [        SU S35      e[        U[        5      (       a  U/n/ nU H4  nUR                  U Vs/ sH  oeUR                  U5      /PM     sn5        M6     XA4$ s  snf )Nr   z>You must include at least one label and at least one sequence.z"The provided hypothesis_template "z" was not able to be formatted with the target labels. Make sure the passed template includes formatting syntax such as {} where the label should go.)len
ValueErrorformatr   r   extend)r   	sequencesr   hypothesis_templatesequence_pairssequencer   s          r   __call__.ZeroShotClassificationArgumentHandler.__call__   s    v;!s9~2]^^%%fQi04GG45H4I Jq q 
 i%%"I!H!!^d"e^dUZ.A.H.H.O#P^d"ef " (( #fs   ?B(
 N)__name__
__module____qualname____firstlineno____doc__r   r$   __static_attributes__r&       r   r   r      s    

)r-   r   T)has_tokenizerc                      ^  \ rS rSrSrSrSrSrSr\	" 5       4U 4S jjr
\S 5       rSS\R                  4S jrS rS	\\\\   4   4U 4S
 jjrSS jrS rSS jrSrU =r$ )ZeroShotClassificationPipeline,   a  
NLI-based zero-shot classification pipeline using a `ModelForSequenceClassification` trained on NLI (natural
language inference) tasks. Equivalent of `text-classification` pipelines, but these models don't require a
hardcoded number of potential classes, they can be chosen at runtime. It usually means it's slower but it is
**much** more flexible.

Any combination of sequences and labels can be passed and each combination will be posed as a premise/hypothesis
pair and passed to the pretrained model. Then, the logit for *entailment* is taken as the logit for the candidate
label being valid. Any NLI model can be used, but the id of the *entailment* label must be included in the model
config's :attr:*~transformers.PretrainedConfig.label2id*.

Example:

```python
>>> from transformers import pipeline

>>> oracle = pipeline(model="facebook/bart-large-mnli")
>>> oracle(
...     "I have a problem with my iphone that needs to be resolved asap!!",
...     candidate_labels=["urgent", "not urgent", "phone", "tablet", "computer"],
... )
{'sequence': 'I have a problem with my iphone that needs to be resolved asap!!', 'labels': ['urgent', 'phone', 'computer', 'not urgent', 'tablet'], 'scores': [0.504, 0.479, 0.013, 0.003, 0.002]}

>>> oracle(
...     "I have a problem with my iphone that needs to be resolved asap!!",
...     candidate_labels=["english", "german"],
... )
{'sequence': 'I have a problem with my iphone that needs to be resolved asap!!', 'labels': ['english', 'german'], 'scores': [0.814, 0.186]}
```

Learn more about the basics of using a pipeline in the [pipeline tutorial](../pipeline_tutorial)

This NLI pipeline can currently be loaded from [`pipeline`] using the following task identifier:
`"zero-shot-classification"`.

The models that this pipeline can use are models that have been fine-tuned on an NLI task. See the up-to-date list
of available models on [huggingface.co/models](https://huggingface.co/models?search=nli).
FTc                 ~   > Xl         [        TU ]  " U0 UD6  U R                  S:X  a  [        R                  S5        g g )NzFailed to determine 'entailment' label id from the label2id mapping in the model config. Setting to -1. Define a descriptive label2id mapping in the model config to ensure correct outputs.)_args_parsersuper__init__entailment_idloggerwarning)r   args_parserargskwargs	__class__s       r   r6   'ZeroShotClassificationPipeline.__init__Z   s@    '$)&)#NNk $r-   c                     U R                   R                  R                  R                  5        H-  u  pUR	                  5       R                  S5      (       d  M+  Us  $    g)Nentailr3   )modelconfiglabel2iditemslower
startswith)r   r   inds      r   r7   ,ZeroShotClassificationPipeline.entailment_idc   sH    **++44::<JE{{}''11
 = r-   c           	      |   U R                   nU R                  R                  c:  [        R	                  S5        U R                  R
                  U R                  l         U R                  UUUUUS9nU$ ! [         a=  nS[        U5      ;   a'  U R                  UUUU[        R                  S9n SnAU$ UeSnAff = f)zU
Parse arguments and tokenize only_first so that hypothesis (label) is not truncated
NzfTokenizer was not supporting padding necessary for zero-shot, attempting to use  `pad_token=eos_token`)add_special_tokensreturn_tensorspadding
truncationz	too short)
	framework	tokenizer	pad_tokenr8   error	eos_token	Exceptionr   r   DO_NOT_TRUNCATE)	r   r"   rL   rJ   rM   r<   rK   inputses	            r   _parse_and_tokenize2ZeroShotClassificationPipeline._parse_and_tokenizej   s     >>##+LL) (,~~'?'?DNN$	^^#5-% $ F2 %  	c!f$ "'9#1#1AA (   !	s   A4 4
B;>0B64B66B;c                     UR                  S5      b  US   US'   [        R                  S5        0 nSU;   a!  U R                  R	                  US   5      US'   SU;   a  US   US'   0 nSU;   a  US   US'   U0 U4$ )Nmulti_classmulti_labelzThe `multi_class` argument has been deprecated and renamed to `multi_label`. `multi_class` will be removed in a future version of Transformers.candidate_labelsr!   )getr8   r9   r4   r   )r   r<   preprocess_paramspostprocess_paramss       r   _sanitize_parameters3ZeroShotClassificationPipeline._sanitize_parameters   s    ::m$0$*=$9F=!NNU '484E4E4S4STZ[mTn4o01 F*7=>S7T34F"06}0E}- "&888r-   r    c                    > [        U5      S:X  a  O,[        U5      S:X  a  SU;  a	  US   US'   O[        SU 35      e[        TU ]  " U40 UD6$ )aJ  
Classify the sequence(s) given as inputs. See the [`ZeroShotClassificationPipeline`] documentation for more
information.

Args:
    sequences (`str` or `list[str]`):
        The sequence(s) to classify, will be truncated if the model input is too large.
    candidate_labels (`str` or `list[str]`):
        The set of possible class labels to classify each sequence into. Can be a single label, a string of
        comma-separated labels, or a list of labels.
    hypothesis_template (`str`, *optional*, defaults to `"This example is {}."`):
        The template used to turn each label into an NLI-style hypothesis. This template must include a {} or
        similar syntax for the candidate label to be inserted into the template. For example, the default
        template is `"This example is {}."` With the candidate label `"sports"`, this would be fed into the
        model like `"<cls> sequence to classify <sep> This example is sports . <sep>"`. The default template
        works well in many cases, but it may be worthwhile to experiment with different templates depending on
        the task setting.
    multi_label (`bool`, *optional*, defaults to `False`):
        Whether or not multiple candidate labels can be true. If `False`, the scores are normalized such that
        the sum of the label likelihoods for each sequence is 1. If `True`, the labels are considered
        independent and probabilities are normalized for each candidate by doing a softmax of the entailment
        score vs. the contradiction score.

Return:
    A `dict` or a list of `dict`: Each result comes as a dictionary with the following keys:

    - **sequence** (`str`) -- The sequence for which this is the output.
    - **labels** (`list[str]`) -- The labels sorted by order of likelihood.
    - **scores** (`list[float]`) -- The probabilities for each of the labels.
r   r   r\   z%Unable to understand extra arguments )r   r   r5   r$   )r   r    r;   r<   r=   s       r   r$   'ZeroShotClassificationPipeline.__call__   s^    H t9>Y!^ 2& @)-aF%&DTFKLLw	4V44r-   c              #      #    U R                  XU5      u  pE[        [        X$5      5       H6  u  nu  pxU R                  U/5      n	UUS   U[	        U5      S-
  :H  S.U	Ev   M8     g 7f)Nr   r   candidate_labelr#   is_last)r4   	enumerateziprW   r   )
r   rU   r\   r!   r"   r    irf   sequence_pairmodel_inputs
             r   
preprocess)ZeroShotClassificationPipeline.preprocess   s~     $($5$5fPc$d!3<SAQ=b3c/A/22M?CK $3%aL$4 5 99 	  4ds   A$A&c                 v   US   nUS   nU R                   R                   Vs0 sH  oDX   _M	     nnU R                  S:X  a  U R                  R                  OU R                  R
                  nS[        R                  " U5      R                  ;   a  SUS'   U R                  " S0 UD6nUUUS   S.UEnU$ s  snf )	Nrf   r#   pt	use_cacheFrg   re   r&   )	rO   model_input_namesrN   rA   forwardcallinspect	signature
parameters)	r   rU   rf   r#   kmodel_inputsmodel_forwardoutputsmodel_outputss	            r   _forward'ZeroShotClassificationPipeline._forward   s     !23*%.2nn.N.NO.N69.NO.2nn.D

**$**//'++M:EEE(-L%**,|,  / i(
 	
  Ps   B6c                 "   U Vs/ sH  o3S   PM	     nnU Vs/ sH  o3S   PM	     nnU R                   S:X  aE  [        R                  " U Vs/ sH#  ofS   R                  5       R	                  5       PM%     sn5      nO6[        R                  " U Vs/ sH  ofS   R	                  5       PM     sn5      nUR
                  S   n[        U5      n	X-  n
UR                  XS45      nU(       d  [        U5      S:X  a_  U R                  nUS:X  a  SOSnUSX/4   n[        R                  " U5      [        R                  " U5      R                  SS	S
9-  nUS   nOLUSU R                  4   n[        R                  " U5      [        R                  " U5      R                  SS	S
9-  n[        [        US   R                  5       5      5      nUS   U Vs/ sH  nUU   PM
     snUSU4   R                  5       S.$ s  snf s  snf s  snf s  snf s  snf )Nrf   r#   rp   logitsr   r3   r   .T)keepdims).r   )r#   r   scores)rN   npconcatenatefloatnumpyshaper   reshaper7   expsumlistreversedargsorttolist)r   r|   r[   r{   r\   r    outputr   Nnnum_sequencesreshaped_outputsr7   contradiction_identail_contr_logitsr   entail_logitstop_indsrj   s                      r   postprocess*ZeroShotClassificationPipeline.postprocess   s   FSTm7$56mT8EFWZ(	F>>T!^^Ta$bTa&H%5%;%;%=%C%C%ETa$bcF^^M$ZM&H%5%;%;%=M$Z[FLLO !!>>=R*@A#./14 ..M%2a%7rQ"239I8Y3Y"ZVV/0266:M3N3R3RSU`d3R3eeFF^F -S$2D2D-DEMVVM*RVVM-B-F-FrTX-F-YYF!2!2!456!!4<=Hq'*H=Q[)002
 	
1 UF$b$Z* >s   G8G=)HHH)r4   )NzThis example is {}.)F)r'   r(   r)   r*   r+   _load_processor_load_image_processor_load_feature_extractor_load_tokenizerr   r6   propertyr7   r   
ONLY_FIRSTrW   r`   r   r   r   r$   rm   r}   r   r,   __classcell__)r=   s   @r   r0   r0   ,   s    %N O!#O#H#J    '+tPbPmPm(T9$+5d3i(+5Z$
 
r-   r0   )ru   typingr   r   r   tokenization_utilsr   utilsr   r   baser	   r
   r   
get_loggerr'   r8   r   r0   r&   r-   r   <module>r      sb       3 / J J 
		H	%)O )< ,4@Ab
] b
 Bb
r-   