
    <hux                     .   S SK r S SKrS SKJrJrJrJr  S SKrSSK	J
r
  SSKJrJrJrJr  SSKJrJrJrJr  \" 5       (       a
  S SKrSSKJr  \" 5       (       a
  S SKrSS	KJr   " S
 S\5      r " S S\5      r\" \" SS9S5       " S S\5      5       r\rg)    N)AnyOptionalUnionoverload   )BasicTokenizer)ExplicitEnumadd_end_docstringsis_tf_availableis_torch_available   )ArgumentHandlerChunkPipelineDatasetbuild_pipeline_init_args)/TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES),MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMESc                   6    \ rS rSrSrS\\\\   4   4S jrSr	g)"TokenClassificationArgumentHandler   z-
Handles arguments for token classification.
inputsc                 h   UR                  SS5      nUR                  S5      nUbA  [        U[        [        45      (       a&  [	        U5      S:  a  [        U5      n[	        U5      nOf[        U[
        5      (       a  U/nSnOK[        b  [        U[        5      (       d  [        U[        R                  5      (       a  XS U4$ [        S5      eUR                  S5      nU(       aJ  [        U[        5      (       a  [        US   [        5      (       a  U/n[	        U5      U:w  a  [        S5      eXXd4$ )	Nis_split_into_wordsF	delimiterr   r   zAt least one input is required.offset_mappingz;offset_mapping should have the same batch size as the input)
get
isinstancelisttuplelenstrr   typesGeneratorType
ValueError)selfr   kwargsr   r   
batch_sizer   s          c/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/pipelines/token_classification.py__call__+TokenClassificationArgumentHandler.__call__    s	   $jj)>FJJ{+	*VdE]"C"CFVW&\FVJ$$XFJ Z%@%@JvW\WjWjDkDki??>??$45.$//J~a?PRW4X4X"0!1>"j0 !^__NEE     N)
__name__
__module____qualname____firstlineno____doc__r   r!   r   r)   __static_attributes__r,   r+   r(   r   r      s"    FuS$s)^4 Fr+   r   c                   ,    \ rS rSrSrSrSrSrSrSr	Sr
g	)
AggregationStrategy8   zDAll the valid aggregation strategies for TokenClassificationPipelinenonesimplefirstaveragemaxr,   N)r-   r.   r/   r0   r1   NONESIMPLEFIRSTAVERAGEMAXr2   r,   r+   r(   r4   r4   8   s    NDFEG
Cr+   r4   T)has_tokenizera
  
        ignore_labels (`list[str]`, defaults to `["O"]`):
            A list of labels to ignore.
        grouped_entities (`bool`, *optional*, defaults to `False`):
            DEPRECATED, use `aggregation_strategy` instead. Whether or not to group the tokens corresponding to the
            same entity together in the predictions or not.
        stride (`int`, *optional*):
            If stride is provided, the pipeline is applied on all the text. The text is split into chunks of size
            model_max_length. Works only with fast tokenizers and `aggregation_strategy` different from `NONE`. The
            value of this argument defines the number of overlapping tokens between chunks. In other words, the model
            will shift forward by `tokenizer.model_max_length - stride` tokens each step.
        aggregation_strategy (`str`, *optional*, defaults to `"none"`):
            The strategy to fuse (or not) tokens based on the model prediction.

                - "none" : Will simply not do any aggregation and simply return raw results from the model
                - "simple" : Will attempt to group entities following the default schema. (A, B-TAG), (B, I-TAG), (C,
                  I-TAG), (D, B-TAG2) (E, B-TAG2) will end up being [{"word": ABC, "entity": "TAG"}, {"word": "D",
                  "entity": "TAG2"}, {"word": "E", "entity": "TAG2"}] Notice that two consecutive B tags will end up as
                  different entities. On word based languages, we might end up splitting words undesirably : Imagine
                  Microsoft being tagged as [{"word": "Micro", "entity": "ENTERPRISE"}, {"word": "soft", "entity":
                  "NAME"}]. Look for FIRST, MAX, AVERAGE for ways to mitigate that and disambiguate words (on languages
                  that support that meaning, which is basically tokens separated by a space). These mitigations will
                  only work on real words, "New york" might still be tagged with two different entities.
                - "first" : (works only on word based models) Will use the `SIMPLE` strategy except that words, cannot
                  end up with different tags. Words will simply use the tag of the first token of the word when there
                  is ambiguity.
                - "average" : (works only on word based models) Will use the `SIMPLE` strategy except that words,
                  cannot end up with different tags. scores will be averaged first across tokens, and then the maximum
                  label is applied.
                - "max" : (works only on word based models) Will use the `SIMPLE` strategy except that words, cannot
                  end up with different tags. Word entity will simply be the token with the maximum score.c                   ,  ^  \ rS rSrSrSrSrSrSrSr	\
" 5       4U 4S jjr        S+S\\   S	\\   S
\\   S\\\\\4         S\\   S\\   S\\   4S jjr\S\S\S\\\\4      4S j5       r\S\\   S\S\\\\\4         4S j5       rS\\\\   4   S\S\\\\\4      \\\\\4         4   4U 4S jjrS,S jrS r\R6                  S4S jrS r  S-S\S\R>                  S\R>                  S\\\\\4         S\R>                  S
\S\\\\         S\\\\\4         S\\   4S  jjr S!\\   S
\S\\   4S" jr!S#\\   S
\S\4S$ jr"S#\\   S
\S\\   4S% jr#S#\\   S\4S& jr$S'\S\\\4   4S( jr%S#\\   S\\   4S) jr&S*r'U =r($ ).TokenClassificationPipelineB   u	  
Named Entity Recognition pipeline using any `ModelForTokenClassification`. See the [named entity recognition
examples](../task_summary#named-entity-recognition) for more information.

Example:

```python
>>> from transformers import pipeline

>>> token_classifier = pipeline(model="Jean-Baptiste/camembert-ner", aggregation_strategy="simple")
>>> sentence = "Je m'appelle jean-baptiste et je vis à montréal"
>>> tokens = token_classifier(sentence)
>>> tokens
[{'entity_group': 'PER', 'score': 0.9931, 'word': 'jean-baptiste', 'start': 12, 'end': 26}, {'entity_group': 'LOC', 'score': 0.998, 'word': 'montréal', 'start': 38, 'end': 47}]

>>> token = tokens[0]
>>> # Start and end provide an easy way to highlight words in the original text.
>>> sentence[token["start"] : token["end"]]
' jean-baptiste'

>>> # Some models use the same idea to do part of speech.
>>> syntaxer = pipeline(model="vblagoje/bert-english-uncased-finetuned-pos", aggregation_strategy="simple")
>>> syntaxer("My name is Sarah and I live in London")
[{'entity_group': 'PRON', 'score': 0.999, 'word': 'my', 'start': 0, 'end': 2}, {'entity_group': 'NOUN', 'score': 0.997, 'word': 'name', 'start': 3, 'end': 7}, {'entity_group': 'AUX', 'score': 0.994, 'word': 'is', 'start': 8, 'end': 10}, {'entity_group': 'PROPN', 'score': 0.999, 'word': 'sarah', 'start': 11, 'end': 16}, {'entity_group': 'CCONJ', 'score': 0.999, 'word': 'and', 'start': 17, 'end': 20}, {'entity_group': 'PRON', 'score': 0.999, 'word': 'i', 'start': 21, 'end': 22}, {'entity_group': 'VERB', 'score': 0.998, 'word': 'live', 'start': 23, 'end': 27}, {'entity_group': 'ADP', 'score': 0.999, 'word': 'in', 'start': 28, 'end': 30}, {'entity_group': 'PROPN', 'score': 0.999, 'word': 'london', 'start': 31, 'end': 37}]
```

Learn more about the basics of using a pipeline in the [pipeline tutorial](../pipeline_tutorial)

This token recognition pipeline can currently be loaded from [`pipeline`] using the following task identifier:
`"ner"` (for predicting the classes of tokens in a sequence: person, organisation, location or miscellaneous).

The models that this pipeline can use are models that have been fine-tuned on a token classification task. See the
up-to-date list of available models on
[huggingface.co/models](https://huggingface.co/models?filter=token-classification).
	sequencesFTc                    > [         TU ]  " U0 UD6  U R                  U R                  S:X  a  [        O[
        5        [        SS9U l        Xl        g )NtfF)do_lower_case)	super__init__check_model_type	frameworkr   r   r   _basic_tokenizer_args_parser)r%   args_parserargsr&   	__class__s       r(   rI   $TokenClassificationPipeline.__init__   sL    $)&)~~% <=	
 !/U C'r+   Ngrouped_entitiesignore_subwordsaggregation_strategyr   r   strider   c	                    0 n	XiS'   U(       a
  Uc  SOUU	S'   Ub  XYS'   0 n
Uc  Ub  U(       a  U(       a  [         R                  nO/U(       a  U(       d  [         R                  nO[         R                  nUb  [        R
                  " SU S35        Ub  [        R
                  " SU S35        Ub  [        U[        5      (       a  [         UR                  5          nU[         R                  [         R                  [         R                  1;   a&  U R                  R                  (       d  [        S5      eXJS	'   Ub  XS
'   Ubx  XpR                  R                  :  a  [        S5      eU[         R                  :X  a  [        SU S35      eU R                  R                  (       a  SSUS.nXS'   O[        S5      eU	0 U
4$ )Nr    r   r   zl`grouped_entities` is deprecated and will be removed in version v5.0.0, defaulted to `aggregation_strategy="z"` instead.zk`ignore_subwords` is deprecated and will be removed in version v5.0.0, defaulted to `aggregation_strategy="z{Slow tokenizers cannot handle subwords. Please set the `aggregation_strategy` option to `"simple"` or use a fast tokenizer.rT   ignore_labelszl`stride` must be less than `tokenizer.model_max_length` (or even lower if the tokenizer adds special tokens)zI`stride` was provided to process all the text but `aggregation_strategy="z&"`, please select another one instead.T)return_overflowing_tokenspaddingrU   tokenizer_paramszm`stride` was provided to process all the text but you're using a slow tokenizer. Please use a fast tokenizer.)r4   r=   r<   r;   warningswarnr   r!   upperr?   r>   	tokenizeris_fastr$   model_max_length)r%   rX   rR   rS   rT   r   r   rU   r   preprocess_paramspostprocess_paramsr[   s               r(   _sanitize_parameters0TokenClassificationPipeline._sanitize_parameters   s    3F/04=4ES9k*%2@./'?+FO':'@'@$!/':'A'A$':'?'?$+//C.DKQ *//C.DKQ
  +.44':;O;U;U;W'X$$'--/B/F/FH[HcHcde.. >  :N56$2?/888  C  $':'?'?? ,--SU 
 >>))59#'"(($
 =M&89$8  !"&888r+   r   r&   returnc                     g Nr,   r%   r   r&   s      r(   r)   $TokenClassificationPipeline.__call__   s    LOr+   c                     g rh   r,   ri   s      r(   r)   rj      s    X[r+   c                    > U R                   " U40 UD6u  p4pVXBS'   XbS'   U(       a)  [        S U 5       5      (       d  [        TU ]  " U/40 UD6$ U(       a  XRS'   [        TU ]  " U40 UD6$ )ak  
Classify each token of the text(s) given as inputs.

Args:
    inputs (`str` or `List[str]`):
        One or several texts (or one list of texts) for token classification. Can be pre-tokenized when
        `is_split_into_words=True`.

Return:
    A list or a list of list of `dict`: Each result comes as a list of dictionaries (one for each token in the
    corresponding input, or each entity if this pipeline was instantiated with an aggregation_strategy) with
    the following keys:

    - **word** (`str`) -- The token/word classified. This is obtained by decoding the selected tokens. If you
      want to have the exact string in the original sentence, use `start` and `end`.
    - **score** (`float`) -- The corresponding probability for `entity`.
    - **entity** (`str`) -- The entity predicted for that token/word (it is named *entity_group* when
      *aggregation_strategy* is not `"none"`.
    - **index** (`int`, only present when `aggregation_strategy="none"`) -- The index of the corresponding
      token in the sentence.
    - **start** (`int`, *optional*) -- The index of the start of the corresponding entity in the sentence. Only
      exists if the offsets are available within the tokenizer
    - **end** (`int`, *optional*) -- The index of the end of the corresponding entity in the sentence. Only
      exists if the offsets are available within the tokenizer
r   r   c              3   @   #    U H  n[        U[        5      v   M     g 7frh   )r   r   ).0inputs     r(   	<genexpr>7TokenClassificationPipeline.__call__.<locals>.<genexpr>  s     *WPVu:eT+B+BPVs   r   )rM   allrH   r)   )r%   r   r&   _inputsr   r   r   rP   s          r(   r)   rj      s    : CGBSBSTZBe^dBe?n(;$%'{s*WPV*W'W'W7#VH777'5#$w1&11r+   c           
   +     #    UR                  S0 5      nU R                  R                  =(       a    U R                  R                  S:  nS nUS   nU(       a  US   n[        U[        5      (       d  [        S5      eUn	UR                  U	5      n/ n[        U5      n
SnU	 H2  nUR                  X[        U5      -   45        U[        U5      U
-   -  nM4     U	nSUS'   O"[        U[        5      (       d  [        S5      eUnU R                  " U4U R                  USU R                  R                  S.UD6nU(       a&  U R                  R                  (       d  [        S	5      eUR                  S
S 5        [        US   5      n[        U5       H  nU R                  S:X  a>  UR                  5        VVs0 sH!  u  nnU[        R                  " UU   S5      _M#     nnnO7UR                  5        VVs0 sH  u  nnUUU   R!                  S5      _M     nnnUb  UUS'   US:X  a  UOS US'   UUS-
  :H  US'   Ub  UR#                  U5      US'   UUS'   Uv   M     g s  snnf s  snnf 7f)Nr[   r   r   r   zEWhen `is_split_into_words=True`, `sentence` must be a list of tokens.TzKWhen `is_split_into_words=False`, `sentence` must be an untokenized string.)return_tensors
truncationreturn_special_tokens_maskreturn_offsets_mappingz@is_split_into_words=True is only supported with fast tokenizers.overflow_to_sample_mapping	input_idsrF   r   sentencer   is_lastword_idsword_to_chars_map)popr_   ra   r   r   r$   joinr    appendr!   rK   r`   rangeitemsrF   expand_dims	unsqueezer}   )r%   r{   r   rb   r[   rv   r~   r   r   wordsdelimiter_lenchar_offsetwordtext_to_tokenizer   
num_chunksikvmodel_inputss                       r(   
preprocess&TokenClassificationPipeline.preprocess  s`    ,001CRH^^44\9X9X[\9\
 /0EF)+6Ih-- !hiiE ~~e,H "	NMK!((+SY7N)OPs4y=88 
  %6:23h,, !noo'
>>!'+#'>>#9#9
 
 t~~'='=_``

/6,-
z"A~~%GM||~V~tq!2>>!A$#: :~VAGPA1Q4>>!#4 4P)1?-.346xtL$&':>&9L# ,+1??1+=Z(4E01 #VPs   F4I)6'II)4!I#AI)c                    UR                  S5      nUR                  SS 5      nUR                  S5      nUR                  S5      nUR                  SS 5      nUR                  SS 5      nU R                  S:X  a  U R                  " S0 UD6S   nO1U R                  " S0 UD6n	[        U	[        5      (       a  U	S	   OU	S   nUUUUUUUS
.UE$ )Nspecial_tokens_maskr   r{   r|   r}   r~   rF   r   logits)r   r   r   r{   r|   r}   r~   r,   )r   rK   modelr   dict)
r%   r   r   r   r{   r|   r}   r~   r   outputs
             r(   _forward$TokenClassificationPipeline._forwardT  s    *../DE%))*:DA##J/""9-##J5(,,-@$G>>T!ZZ/,/2FZZ/,/F)3FD)A)AVH%vayF #6,  !2	
 	
 		
r+   c                 *   Uc  S/n/ nUS   R                  S5      nU GH  nU R                  S:X  ah  US   S   R                  [        R                  [        R
                  4;   a4  US   S   R                  [        R                  5      R                  5       nOUS   S   R                  5       nUS   S   nUS   S   n	US   b  US   S   OS n
US	   S   R                  5       nUR                  S
5      n[        R                  " USSS9n[        R                  " X}-
  5      nXR                  SSS9-  nU R                  S:X  a%  U	R                  5       n	U
b  U
R                  5       OS n
U R                  UU	UU
UUUUS9nU R                  UU5      nU Vs/ sH5  nUR                  SS 5      U;  d  M  UR                  SS 5      U;  d  M3  UPM7     nnUR                  U5        GM     [!        U5      nUS:  a  U R#                  U5      nU$ s  snf )NOr   r~   ptr   r{   rz   r   r   r}   T)axiskeepdimsrF   )r}   r~   entityentity_groupr   )r   rK   dtypetorchbfloat16float16tofloat32numpynpr:   expsumgather_pre_entities	aggregateextendr    aggregate_overlapping_entities)r%   all_outputsrT   rX   all_entitiesr~   model_outputsr   r{   rz   r   r   r}   maxesshifted_expscorespre_entitiesrR   r   entitiesr   s                        r(   postprocess'TokenClassificationPipeline.postprocessn  sG     EM (N../BC(M~~%-*A!*D*J*Ju~~_d_l_lNm*m&x0366u}}EKKM&x0399;"1~j1H%k215I6CDT6U6a./2gk  #00E"Fq"I"O"O"Q$((4HFF6T:E&&0K ??T?#JJF~~%%OO-	;I;U!5!5!7[_33#$!"3 4 	L  $~~l<PQ /.F::h-]B  JJ~t4MI .   )Q )R %
>>>|LLs   H6HHc                 H   [        U5      S:X  a  U$ [        US S9n/ nUS   nU Hg  nUS   US   s=::  a	  US   :  a;  O  O8US   US   -
  nUS   US   -
  nXV:  a  UnM;  XV:X  a  US   US   :  a  UnMP  MR  MT  UR                  U5        UnMi     UR                  U5        U$ )Nr   c                     U S   $ )Nstartr,   )xs    r(   <lambda>LTokenClassificationPipeline.aggregate_overlapping_entities.<locals>.<lambda>  s    !G*r+   keyr   endscore)r    sortedr   )r%   r   aggregated_entitiesprevious_entityr   current_lengthprevious_lengths          r(   r   :TokenClassificationPipeline.aggregate_overlapping_entities  s    x=AO((<= "1+Fw'6'?S_U=SS!'!@"1%"8?7;S"S!3&,O#66'?_]dMe;e&,O <f6 $**?;"(  	""?3""r+   r{   rz   r   r   r}   r~   c	                    / n	[        U5       GH  u  pXZ   (       a  M  U R                  R                  [        X*   5      5      nUGbb  XJ   u  pUb  Ub  Xz   nUb  X   u  nnUU-  nUU-  n[	        U[        5      (       d0  U R
                  S:X  a   UR                  5       nUR                  5       nXU n[        U R                  SS5      (       aH  [        U R                  R                  R                  SS5      (       a  [        U5      [        U5      :g  nOgU[        R                  [        R                  [        R                  1;   a  [        R                   " S["        5        US:  =(       a    SXS-
  US-    ;  n[        X*   5      U R                  R$                  :X  a  UnS	nOSnSnS	nUUUUU
US
.nU	R'                  U5        GM     U	$ )zTFuse various numpy arrays into dicts with all the information needed for aggregationNr   
_tokenizercontinuing_subword_prefixz?Tokenizer does not support real words, using fallback heuristicr   rW   r   F)r   r   r   r   index
is_subword)	enumerater_   convert_ids_to_tokensintr   rK   itemgetattrr   r   r    r4   r=   r>   r?   r\   r]   UserWarningunk_token_idr   )r%   r{   rz   r   r   r   rT   r}   r~   r   idxtoken_scoresr   	start_indend_ind
word_index
start_char_word_refr   
pre_entitys                        r(   r   /TokenClassificationPipeline.gather_pre_entities  s    !*6!2C"'>>77IN8KLD)%3%8"	 ',=,I!)J!-(9(E
A!Z/	:-!)S11~~-$-NN$4	"),,.#g64>><>>7NN--335PRVD D
 "%Tc(m!;J ,+11+33+//0 
 !]' "+Q!e3hST}W`cdWd>e3eJy~&$..*E*EE#D!&J 	"
 &"(J 
+s "3t r+   r   c                    U[         R                  [         R                  1;   an  / nU He  nUS   R                  5       nUS   U   nU R                  R
                  R                  U   UUS   US   US   US   S.nUR                  U5        Mg     OU R                  X5      nU[         R                  :X  a  U$ U R                  U5      $ )Nr   r   r   r   r   )r   r   r   r   r   r   )
r4   r;   r<   argmaxr   configid2labelr   aggregate_wordsgroup_entities)r%   r   rT   r   r   
entity_idxr   r   s           r(   r   %TokenClassificationPipeline.aggregate  s    $7$<$<>Q>X>X#YYH*
'188:
"8,Z8"jj//88D"'0&v.'0%e, ' + ++LOH#6#;#;;O""8,,r+   r   c                 $   U R                   R                  U Vs/ sH  o3S   PM	     sn5      nU[        R                  :X  a@  US   S   nUR	                  5       nXV   nU R
                  R                  R                  U   nOU[        R                  :X  aH  [        US S9nUS   nUR	                  5       nXV   nU R
                  R                  R                  U   nOU[        R                  :X  au  [        R                  " U Vs/ sH  o3S   PM	     sn5      n[        R                  " USS9n	U	R	                  5       n
U R
                  R                  R                  U
   nX   nO[        S5      eUUUUS   S   US	   S
   S.nU$ s  snf s  snf )Nr   r   r   c                 (    U S   R                  5       $ )Nr   )r:   )r   s    r(   r   <TokenClassificationPipeline.aggregate_word.<locals>.<lambda>  s    &:J:N:N:Pr+   r   )r   zInvalid aggregation_strategyr   r   r   )r   r   r   r   r   )r_   convert_tokens_to_stringr4   r=   r   r   r   r   r?   r:   r>   r   stacknanmeanr$   )r%   r   rT   r   r   r   r   r   
max_entityaverage_scoresr   
new_entitys               r(   aggregate_word*TokenClassificationPipeline.aggregate_word  s|   ~~66U]7^U]6vU]7^_#6#<#<<a[*F--/CKEZZ&&//4F!%8%<%<<X+PQJ)F--/CKEZZ&&//4F!%8%@%@@XXhGhFh/hGHFZZQ7N'..0JZZ&&//
;F".E;<<a[)B<&

 7 8_ Hs   FFc                 T   U[         R                  [         R                  1;   a  [        S5      e/ nSnU HK  nUc  U/nM  US   (       a  UR	                  U5        M(  UR	                  U R                  XB5      5        U/nMM     Ub   UR	                  U R                  XB5      5        U$ )z
Override tokens from a given word that disagree to force agreement on word boundaries.

Example: micro|soft| com|pany| B-ENT I-NAME I-ENT I-ENT will be rewritten with first strategy as microsoft|
company| B-ENT I-ENT
z;NONE and SIMPLE strategies are invalid for word aggregationNr   )r4   r;   r<   r$   r   r   )r%   r   rT   word_entities
word_groupr   s         r(   r   +TokenClassificationPipeline.aggregate_words5  s      $$&&$
 
 Z[[
F!$X
%!!&)$$T%8%8%Z[$X
  !  !4!4Z!VWr+   c                 H   US   S   R                  SS5      S   n[        R                  " U Vs/ sH  o"S   PM	     sn5      nU Vs/ sH  o"S   PM	     nnW[        R                  " U5      U R                  R                  U5      US   S   US   S	   S
.nU$ s  snf s  snf )z
Group together the adjacent tokens with the same entity predicted.

Args:
    entities (`dict`): The entities predicted by the pipeline.
r   r   -r   r   r   r   r   r   )r   r   r   r   r   )splitr   r   meanr_   r   )r%   r   r   r   tokensr   s         r(   group_sub_entities.TokenClassificationPipeline.group_sub_entitiesQ  s     !X&,,S!4R88D8G_8DE/78xV.x8 #WWV_NN;;FCa[)B<&
  E8s   BBentity_namec                     UR                  S5      (       a
  SnUSS  nX#4$ UR                  S5      (       a
  SnUSS  nX#4$ SnUnX#4$ )NzB-Br   zI-I)
startswith)r%   r   bitags       r(   get_tag#TokenClassificationPipeline.get_tagf  sk    !!$''Bab/C w ##D))Bab/C w BCwr+   c                    / n/ nU H  nU(       d  UR                  U5        M  U R                  US   5      u  pVU R                  US   S   5      u  pxXh:X  a  US:w  a  UR                  U5        Mj  UR                  U R                  U5      5        U/nM     U(       a   UR                  U R                  U5      5        U$ )z
Find and group together the adjacent tokens with the same entity predicted.

Args:
    entities (`dict`): The entities predicted by the pipeline.
r   r   r   )r   r  r   )	r%   r   entity_groupsentity_group_disaggr   r  r  last_bilast_tags	            r(   r   *TokenClassificationPipeline.group_entitiest  s      F&#**62 ll6(#34GB $-@-DX-N OG29#**62 $$T%<%<=P%QR'-h#' (   !8!89L!MNr+   )rM   rL   )NNNNNFNNrh   )NN))r-   r.   r/   r0   r1   default_input_names_load_processor_load_image_processor_load_feature_extractor_load_tokenizerr   rI   r   boolr4   r   r   r   r!   rd   r   r   r   r)   r   r   r   r;   r   r   r   ndarrayr   r   r   r   r   r  r   r2   __classcell__)rP   s   @r(   rB   rB   B   s   F"H &O!#O#E#G 
( +/*.>B:>.3 $#'N9 #4.N9 "$	N9
 '':;N9 !eCHo!67N9 &d^N9 N9 C=N9` OsOcOd4S>6JO O[tCy[C[Dd3PS8nAU<V[ [%2CcN+%27:%2	tDcN#T$tCH~*>%??	@%2N9v
4 =P<T<Tdh 4l#8 37=AGG ::G 

	G
 !eCHo!67G  ZZG 2G 4./G $DsCx$9:G 
dGR-d4j -H[ -`dei`j -,tDz I\ ae <T
 J] bfgkbl 84: $ *3 5c? #tDz #d4j # #r+   rB   ) r"   r\   typingr   r   r   r   r   r   models.bert.tokenization_bertr   utilsr	   r
   r   r   baser   r   r   r   
tensorflowrF   models.auto.modeling_tf_autor   r   models.auto.modeling_autor   r   r4   rB   NerPipeliner,   r+   r(   <module>r     s      1 1  :  T S ^XF F:,  40n!Ds- sE!Dsl *r+   