ó
    <±hux  ã                   ó.  • S SK r S SKrS SKJrJrJrJr  S SKrSSK	J
r
  SSKJrJrJrJr  SSKJrJrJrJr  \" 5       (       a
  S SKrSSKJr  \" 5       (       a
  S SKrSS	KJr   " S
 S\5      r " S S\5      r\" \" SS9S5       " S S\5      5       r\rg)é    N)ÚAnyÚOptionalÚUnionÚoverloadé   )ÚBasicTokenizer)ÚExplicitEnumÚadd_end_docstringsÚis_tf_availableÚis_torch_availableé   )ÚArgumentHandlerÚChunkPipelineÚDatasetÚbuild_pipeline_init_args)Ú/TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES)Ú,MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMESc                   ó6   • \ rS rSrSrS\\\\   4   4S jrSr	g)Ú"TokenClassificationArgumentHandleré   z-
Handles arguments for token classification.
Úinputsc                 óh  • UR                  SS5      nUR                  S5      nUbA  [        U[        [        45      (       a&  [	        U5      S:”  a  [        U5      n[	        U5      nOf[        U[
        5      (       a  U/nSnOK[        b  [        U[        5      (       d  [        U[        R                  5      (       a  XS U4$ [        S5      eUR                  S5      nU(       aJ  [        U[        5      (       a  [        US   [        5      (       a  U/n[	        U5      U:w  a  [        S5      eXXd4$ )	NÚis_split_into_wordsFÚ	delimiterr   r   zAt least one input is required.Úoffset_mappingz;offset_mapping should have the same batch size as the input)
ÚgetÚ
isinstanceÚlistÚtupleÚlenÚstrr   ÚtypesÚGeneratorTypeÚ
ValueError)Úselfr   Úkwargsr   r   Ú
batch_sizer   s          Úc/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/pipelines/token_classification.pyÚ__call__Ú+TokenClassificationArgumentHandler.__call__    s	  € Ø$Ÿj™jÐ)>ÀÓFÐØ—J‘J˜{Ó+ˆ	àÑ¤*¨V´d¼E°]×"CÑ"CÌÈFËÐVWËÜ˜&“\ˆFÜ˜V›‰JÜ˜¤×$Ñ$ØXˆFØ‰JÜÑ ¤Z°¼×%@Ñ%@ÄJÈvÔW\×WjÑWj×DkÑDkØ°°iÐ?Ð?äÐ>Ó?Ð?àŸ™Ð$4Ó5ˆÞÜ˜.¬$×/Ñ/´J¸~ÈaÑ?PÔRW×4XÑ4XØ"0Ð!1Ü>Ó" jÓ0Ü Ð!^Ó_Ð_Ø¨NÐEÐEó    © N)
Ú__name__Ú
__module__Ú__qualname__Ú__firstlineno__Ú__doc__r   r!   r   r)   Ú__static_attributes__r,   r+   r(   r   r      s"   † ñðF˜u S¨$¨s©) ^Ñ4÷ Fr+   r   c                   ó,   • \ rS rSrSrSrSrSrSrSr	Sr
g	)
ÚAggregationStrategyé8   zDAll the valid aggregation strategies for TokenClassificationPipelineÚnoneÚsimpleÚfirstÚaverageÚmaxr,   N)r-   r.   r/   r0   r1   ÚNONEÚSIMPLEÚFIRSTÚAVERAGEÚMAXr2   r,   r+   r(   r4   r4   8   s   † ÙNà€DØ€FØ€EØ€GØ
ƒCr+   r4   T)Úhas_tokenizeraÍ
  
        ignore_labels (`list[str]`, defaults to `["O"]`):
            A list of labels to ignore.
        grouped_entities (`bool`, *optional*, defaults to `False`):
            DEPRECATED, use `aggregation_strategy` instead. Whether or not to group the tokens corresponding to the
            same entity together in the predictions or not.
        stride (`int`, *optional*):
            If stride is provided, the pipeline is applied on all the text. The text is split into chunks of size
            model_max_length. Works only with fast tokenizers and `aggregation_strategy` different from `NONE`. The
            value of this argument defines the number of overlapping tokens between chunks. In other words, the model
            will shift forward by `tokenizer.model_max_length - stride` tokens each step.
        aggregation_strategy (`str`, *optional*, defaults to `"none"`):
            The strategy to fuse (or not) tokens based on the model prediction.

                - "none" : Will simply not do any aggregation and simply return raw results from the model
                - "simple" : Will attempt to group entities following the default schema. (A, B-TAG), (B, I-TAG), (C,
                  I-TAG), (D, B-TAG2) (E, B-TAG2) will end up being [{"word": ABC, "entity": "TAG"}, {"word": "D",
                  "entity": "TAG2"}, {"word": "E", "entity": "TAG2"}] Notice that two consecutive B tags will end up as
                  different entities. On word based languages, we might end up splitting words undesirably : Imagine
                  Microsoft being tagged as [{"word": "Micro", "entity": "ENTERPRISE"}, {"word": "soft", "entity":
                  "NAME"}]. Look for FIRST, MAX, AVERAGE for ways to mitigate that and disambiguate words (on languages
                  that support that meaning, which is basically tokens separated by a space). These mitigations will
                  only work on real words, "New york" might still be tagged with two different entities.
                - "first" : (works only on word based models) Will use the `SIMPLE` strategy except that words, cannot
                  end up with different tags. Words will simply use the tag of the first token of the word when there
                  is ambiguity.
                - "average" : (works only on word based models) Will use the `SIMPLE` strategy except that words,
                  cannot end up with different tags. scores will be averaged first across tokens, and then the maximum
                  label is applied.
                - "max" : (works only on word based models) Will use the `SIMPLE` strategy except that words, cannot
                  end up with different tags. Word entity will simply be the token with the maximum score.c                   ó,  ^ • \ rS rSrSrSrSrSrSrSr	\
" 5       4U 4S jjr        S+S\\   S	\\   S
\\   S\\\\\4         S\\   S\\   S\\   4S jjr\S\S\S\\\\4      4S j5       r\S\\   S\S\\\\\4         4S j5       rS\\\\   4   S\S\\\\\4      \\\\\4         4   4U 4S jjrS,S jrS r\R6                  S4S jrS r  S-S\S\R>                  S\R>                  S\\\\\4         S\R>                  S
\S\\\\         S\\\\\4         S\\   4S  jjr S!\\   S
\S\\   4S" jr!S#\\   S
\S\4S$ jr"S#\\   S
\S\\   4S% jr#S#\\   S\4S& jr$S'\S\\\4   4S( jr%S#\\   S\\   4S) jr&S*r'U =r($ ).ÚTokenClassificationPipelineéB   u	  
Named Entity Recognition pipeline using any `ModelForTokenClassification`. See the [named entity recognition
examples](../task_summary#named-entity-recognition) for more information.

Example:

```python
>>> from transformers import pipeline

>>> token_classifier = pipeline(model="Jean-Baptiste/camembert-ner", aggregation_strategy="simple")
>>> sentence = "Je m'appelle jean-baptiste et je vis Ã  montrÃ©al"
>>> tokens = token_classifier(sentence)
>>> tokens
[{'entity_group': 'PER', 'score': 0.9931, 'word': 'jean-baptiste', 'start': 12, 'end': 26}, {'entity_group': 'LOC', 'score': 0.998, 'word': 'montrÃ©al', 'start': 38, 'end': 47}]

>>> token = tokens[0]
>>> # Start and end provide an easy way to highlight words in the original text.
>>> sentence[token["start"] : token["end"]]
' jean-baptiste'

>>> # Some models use the same idea to do part of speech.
>>> syntaxer = pipeline(model="vblagoje/bert-english-uncased-finetuned-pos", aggregation_strategy="simple")
>>> syntaxer("My name is Sarah and I live in London")
[{'entity_group': 'PRON', 'score': 0.999, 'word': 'my', 'start': 0, 'end': 2}, {'entity_group': 'NOUN', 'score': 0.997, 'word': 'name', 'start': 3, 'end': 7}, {'entity_group': 'AUX', 'score': 0.994, 'word': 'is', 'start': 8, 'end': 10}, {'entity_group': 'PROPN', 'score': 0.999, 'word': 'sarah', 'start': 11, 'end': 16}, {'entity_group': 'CCONJ', 'score': 0.999, 'word': 'and', 'start': 17, 'end': 20}, {'entity_group': 'PRON', 'score': 0.999, 'word': 'i', 'start': 21, 'end': 22}, {'entity_group': 'VERB', 'score': 0.998, 'word': 'live', 'start': 23, 'end': 27}, {'entity_group': 'ADP', 'score': 0.999, 'word': 'in', 'start': 28, 'end': 30}, {'entity_group': 'PROPN', 'score': 0.999, 'word': 'london', 'start': 31, 'end': 37}]
```

Learn more about the basics of using a pipeline in the [pipeline tutorial](../pipeline_tutorial)

This token recognition pipeline can currently be loaded from [`pipeline`] using the following task identifier:
`"ner"` (for predicting the classes of tokens in a sequence: person, organisation, location or miscellaneous).

The models that this pipeline can use are models that have been fine-tuned on a token classification task. See the
up-to-date list of available models on
[huggingface.co/models](https://huggingface.co/models?filter=token-classification).
Ú	sequencesFTc                 ó¤   >• [         TU ]  " U0 UD6  U R                  U R                  S:X  a  [        O[
        5        [        SS9U l        Xl        g )NÚtfF)Údo_lower_case)	ÚsuperÚ__init__Úcheck_model_typeÚ	frameworkr   r   r   Ú_basic_tokenizerÚ_args_parser)r%   Úargs_parserÚargsr&   Ú	__class__s       €r(   rI   Ú$TokenClassificationPipeline.__init__   sL   ø€ Ü‰Ò˜$Ð) &Ò)à×Ñà~‰~ Ó%õ <ä=ô	
ô !/¸UÑ CˆÔØ'Õr+   NÚgrouped_entitiesÚignore_subwordsÚaggregation_strategyr   r   Ústrider   c	                 ó„  • 0 n	XiS'   U(       a
  Uc  SOUU	S'   Ub  XYS'   0 n
Uc  Ubˆ  U(       a  U(       a  [         R                  nO/U(       a  U(       d  [         R                  nO[         R                  nUb  [        R
                  " SU S35        Ub  [        R
                  " SU S35        Ub‰  [        U[        5      (       a  [         UR                  5          nU[         R                  [         R                  [         R                  1;   a&  U R                  R                  (       d  [        S5      eXJS	'   Ub  XS
'   Ubx  XpR                  R                  :¼  a  [        S5      eU[         R                  :X  a  [        SU S35      eU R                  R                  (       a  SSUS.nX¹S'   O[        S5      eU	0 U
4$ )Nr   Ú r   r   zl`grouped_entities` is deprecated and will be removed in version v5.0.0, defaulted to `aggregation_strategy="z"` instead.zk`ignore_subwords` is deprecated and will be removed in version v5.0.0, defaulted to `aggregation_strategy="z{Slow tokenizers cannot handle subwords. Please set the `aggregation_strategy` option to `"simple"` or use a fast tokenizer.rT   Úignore_labelszl`stride` must be less than `tokenizer.model_max_length` (or even lower if the tokenizer adds special tokens)zI`stride` was provided to process all the text but `aggregation_strategy="z&"`, please select another one instead.T)Úreturn_overflowing_tokensÚpaddingrU   Útokenizer_paramszm`stride` was provided to process all the text but you're using a slow tokenizer. Please use a fast tokenizer.)r4   r=   r<   r;   ÚwarningsÚwarnr   r!   Úupperr?   r>   Ú	tokenizerÚis_fastr$   Úmodel_max_length)r%   rX   rR   rS   rT   r   r   rU   r   Úpreprocess_paramsÚpostprocess_paramsr[   s               r(   Ú_sanitize_parametersÚ0TokenClassificationPipeline._sanitize_parametersœ   sö  € ð ÐØ3FÐ/Ñ0æØ4=Ñ4E©SÈ9Ð˜kÑ*àÑ%Ø2@Ð.Ñ/àÐØÑ'¨?Ñ+FÞ¦OÜ':×'@Ñ'@Ñ$Þ!®/Ü':×'AÑ'AÑ$ä':×'?Ñ'?Ð$àÑ+Ü—’ð/Ø/CÐ.DÀKðQôð Ñ*Ü—’ð/Ø/CÐ.DÀKðQôð
  Ñ+ÜÐ.´×4Ñ4Ü':Ð;O×;UÑ;UÓ;WÑ'XÐ$à$Ü'×-Ñ-Ô/B×/FÑ/FÔH[×HcÑHcÐdóeàŸ™×.×.ä ð>óð ð :NÐ5Ñ6ØÑ$Ø2?˜Ñ/ØÑØŸ™×8Ñ8Ó8Ü ð Cóð ð $Ô':×'?Ñ'?Ó?Ü ðØ,Ð-Ð-SðUóð ð
 —>‘>×)×)à59Ø#'Ø"(ñ(Ð$ð
 =MÐ&8Ò9ä$ð8óð ð ! "Ð&8Ð8Ð8r+   r   r&   Úreturnc                 ó   • g ©Nr,   ©r%   r   r&   s      r(   r)   Ú$TokenClassificationPipeline.__call__ì   s   € ØLOr+   c                 ó   • g rh   r,   ri   s      r(   r)   rj   ï   s   € ØX[r+   c                 óØ   >• U R                   " U40 UD6u  p4pVXBS'   XbS'   U(       a)  [        S U 5       5      (       d  [        TU ]  " U/40 UD6$ U(       a  XRS'   [        TU ]  " U40 UD6$ )ak  
Classify each token of the text(s) given as inputs.

Args:
    inputs (`str` or `List[str]`):
        One or several texts (or one list of texts) for token classification. Can be pre-tokenized when
        `is_split_into_words=True`.

Return:
    A list or a list of list of `dict`: Each result comes as a list of dictionaries (one for each token in the
    corresponding input, or each entity if this pipeline was instantiated with an aggregation_strategy) with
    the following keys:

    - **word** (`str`) -- The token/word classified. This is obtained by decoding the selected tokens. If you
      want to have the exact string in the original sentence, use `start` and `end`.
    - **score** (`float`) -- The corresponding probability for `entity`.
    - **entity** (`str`) -- The entity predicted for that token/word (it is named *entity_group* when
      *aggregation_strategy* is not `"none"`.
    - **index** (`int`, only present when `aggregation_strategy="none"`) -- The index of the corresponding
      token in the sentence.
    - **start** (`int`, *optional*) -- The index of the start of the corresponding entity in the sentence. Only
      exists if the offsets are available within the tokenizer
    - **end** (`int`, *optional*) -- The index of the end of the corresponding entity in the sentence. Only
      exists if the offsets are available within the tokenizer
r   r   c              3   ó@   #   • U H  n[        U[        5      v •  M     g 7frh   )r   r   )Ú.0Úinputs     r(   Ú	<genexpr>Ú7TokenClassificationPipeline.__call__.<locals>.<genexpr>  s   é € Ð*WÑPVÀu¬:°e¼T×+BÐ+BÒPVùs   ‚r   )rM   ÚallrH   r)   )r%   r   r&   Ú_inputsr   r   r   rP   s          €r(   r)   rj   ò   s€   ø€ ð: CG×BSÒBSÐTZÑBeÐ^dÑBeÑ?ˆ nØ(;Ð$Ñ%Ø'ˆ{ÑÞ¤sÑ*WÑPVÓ*W×'WÑ'WÜ‘7Ò# V HÑ7°Ñ7Ð7ÞØ'5Ð#Ñ$ä‰wÒ Ñ1¨&Ñ1Ð1r+   c           
   +   óÖ  #   • UR                  S0 5      nU R                  R                  =(       a    U R                  R                  S:„  nS nUS   nU(       a‡  US   n[        U[        5      (       d  [        S5      eUn	UR                  U	5      n/ n[        U5      n
SnU	 H2  nUR                  X»[        U5      -   45        U[        U5      U
-   -  nM4     U	nSUS'   O"[        U[        5      (       d  [        S5      eUnU R                  " U4U R                  USU R                  R                  S.UD6nU(       a&  U R                  R                  (       d  [        S	5      eUR                  S
S 5        [        US   5      n[        U5       HÈ  nU R                  S:X  a>  UR                  5        VVs0 sH!  u  nnU[        R                  " UU   S5      _M#     nnnO7UR                  5        VVs0 sH  u  nnUUU   R!                  S5      _M     nnnUb  UUS'   US:X  a  UOS US'   UUS-
  :H  US'   Ub  UR#                  U5      US'   UUS'   Uv •  MÊ     g s  snnf s  snnf 7f)Nr[   r   r   r   zEWhen `is_split_into_words=True`, `sentence` must be a list of tokens.TzKWhen `is_split_into_words=False`, `sentence` must be an untokenized string.)Úreturn_tensorsÚ
truncationÚreturn_special_tokens_maskÚreturn_offsets_mappingz@is_split_into_words=True is only supported with fast tokenizers.Úoverflow_to_sample_mappingÚ	input_idsrF   r   Úsentencer   Úis_lastÚword_idsÚword_to_chars_map)Úpopr_   ra   r   r   r$   Újoinr    Úappendr!   rK   r`   ÚrangeÚitemsrF   Úexpand_dimsÚ	unsqueezer}   )r%   r{   r   rb   r[   rv   r~   r   r   ÚwordsÚdelimiter_lenÚchar_offsetÚwordÚtext_to_tokenizer   Ú
num_chunksÚiÚkÚvÚmodel_inputss                       r(   Ú
preprocessÚ&TokenClassificationPipeline.preprocess  s`  é € Ø,×0Ñ0Ð1CÀRÓHÐØ—^‘^×4Ñ4×\¸¿¹×9XÑ9XÐ[\Ñ9\ˆ
à ÐØ/Ð0EÑFÐÞØ)¨+Ñ6ˆIÜ˜h¬×-Ñ-Ü Ð!hÓiÐiØˆEØ —~‘~ eÓ,ˆHà "ÐÜ 	›NˆMØˆKÛØ!×(Ñ(¨+ÄSÈÃYÑ7NÐ)OÔPØœs 4›y¨=Ñ8Ñ8’ñ ð
  %ÐØ6:ÐÐ2Ò3ä˜h¬×,Ñ,Ü Ð!nÓoÐoØ'Ðà—’Øð
àŸ>™>Ø!Ø'+Ø#'§>¡>×#9Ñ#9ñ
ð ñ
ˆö  t§~¡~×'=×'=ÜÐ_Ó`Ð`à
‰
Ð/°Ô6Ü˜ Ñ,Ó-ˆ
äzÖ"ˆAØ~‰~ Ó%ØGMÇ|Á|Ä~ÔVÁ~¹t¸qÀ! ¤2§>¢>°!°A±$¸Ó#:Ò :Á~ÑVàAGÇÁÄÔPÁ¹¸¸A  1 Q¡4§>¡>°!Ó#4Ò 4ÁÑPØÑ)Ø1?Ð-Ñ.à34¸³6¡x¸tˆL˜Ñ$Ø&'¨:¸©>Ñ&9ˆL˜Ñ#Ø Ñ,Ø+1¯?©?¸1Ó+=˜ZÑ(Ø4EÐ0Ñ1àÔò #ùãVùãPùs   ‚F4I)Æ6'IÇI)Ç4!I#ÈAI)c                 óš  • UR                  S5      nUR                  SS 5      nUR                  S5      nUR                  S5      nUR                  SS 5      nUR                  SS 5      nU R                  S:X  a  U R                  " S0 UD6S   nO1U R                  " S0 UD6n	[        U	[        5      (       a  U	S	   OU	S   nUUUUUUUS
.UE$ )NÚspecial_tokens_maskr   r{   r|   r}   r~   rF   r   Úlogits)r”   r“   r   r{   r|   r}   r~   r,   )r   rK   Úmodelr   Údict)
r%   r   r“   r   r{   r|   r}   r~   r”   Úoutputs
             r(   Ú_forwardÚ$TokenClassificationPipeline._forwardT  sç   € à*×.Ñ.Ð/DÓEÐØ%×)Ñ)Ð*:¸DÓAˆØ×#Ñ# JÓ/ˆØ×"Ñ" 9Ó-ˆØ×#Ñ# J°Ó5ˆØ(×,Ñ,Ð-@À$ÓGÐà>‰>˜TÓ!Ø—Z’ZÑ/ ,Ñ/°Ñ2‰Fà—Z’ZÑ/ ,Ñ/ˆFÜ)3°F¼D×)AÑ)AV˜HÒ%ÀvÈaÁyˆFð Ø#6Ø,Ø ØØ Ø!2ñ	
ð ð	
ð 		
r+   c                 ó*  • Uc  S/n/ nUS   R                  S5      nU GHÈ  nU R                  S:X  ah  US   S   R                  [        R                  [        R
                  4;   a4  US   S   R                  [        R                  5      R                  5       nOUS   S   R                  5       nUS   S   nUS   S   n	US   b  US   S   OS n
US	   S   R                  5       nUR                  S
5      n[        R                  " USSS9n[        R                  " X}-
  5      nXîR                  SSS9-  nU R                  S:X  a%  U	R                  5       n	U
b  U
R                  5       OS n
U R                  UU	UU
UUUUS9nU R                  UU5      nU Vs/ sH5  nUR                  SS 5      U;  d  M  UR                  SS 5      U;  d  M3  UPM7     nnUR                  U5        GMË     [!        U5      nUS:”  a  U R#                  U5      nU$ s  snf )NÚOr   r~   Úptr”   r{   rz   r   r“   r}   éÿÿÿÿT)ÚaxisÚkeepdimsrF   )r}   r~   ÚentityÚentity_groupr   )r   rK   ÚdtypeÚtorchÚbfloat16Úfloat16ÚtoÚfloat32ÚnumpyÚnpr:   ÚexpÚsumÚgather_pre_entitiesÚ	aggregateÚextendr    Úaggregate_overlapping_entities)r%   Úall_outputsrT   rX   Úall_entitiesr~   Úmodel_outputsr”   r{   rz   r   r“   r}   ÚmaxesÚshifted_expÚscoresÚpre_entitiesrR   r    Úentitiesr‹   s                        r(   ÚpostprocessÚ'TokenClassificationPipeline.postprocessn  sG  € ØÑ Ø ˜EˆMØˆð (¨™N×.Ñ.Ð/BÓCÐä(ˆMØ~‰~ Ó%¨-¸Ñ*AÀ!Ñ*D×*JÑ*JÌuÏ~É~Ô_d×_lÑ_lÐNmÓ*mØ& xÑ0°Ñ3×6Ñ6´u·}±}ÓE×KÑKÓM‘à& xÑ0°Ñ3×9Ñ9Ó;à" 1‘~ jÑ1ˆHØ% kÑ2°1Ñ5ˆIà6CÐDTÑ6UÑ6aÐ.Ñ/°Ò2Ðgkð ð #0Ð0EÑ"FÀqÑ"I×"OÑ"OÓ"QÐØ$×(Ñ(¨Ó4ˆHä—F’F˜6¨°TÑ:ˆEÜŸ&š& ¡Ó0ˆKØ §?¡?¸ÀT ?Ð#JÑJˆFà~‰~ Ó%Ø%ŸO™OÓ-	Ø;IÑ;U ×!5Ñ!5Ô!7Ð[_à×3Ñ3ØØØØØ#Ø$Ø!Ø"3ð 4ð 	ˆLð  $Ÿ~™~¨lÐ<PÓQÐñ /óá.FØ—:‘:˜h¨Ó-°]ÑBó ð —J‘J˜~¨tÓ4¸MÑI÷ Ù.ð ð ð ×Ñ ×)ñQ )ôR ˜Ó%ˆ
Ø˜‹>Ø×>Ñ>¸|ÓLˆLØÐùòs   ÆHÆ6HÇHc                 óH  • [        U5      S:X  a  U$ [        US S9n/ nUS   nU Hg  nUS   US   s=::  a	  US   :  a;  O  O8US   US   -
  nUS   US   -
  nXV:”  a  UnM;  XV:X  a  US   US   :”  a  UnMP  MR  MT  UR                  U5        UnMi     UR                  U5        U$ )Nr   c                 ó   • U S   $ )NÚstartr,   )Úxs    r(   Ú<lambda>ÚLTokenClassificationPipeline.aggregate_overlapping_entities.<locals>.<lambda>§  s   € °!°G²*r+   ©Úkeyr¼   ÚendÚscore)r    Úsortedr   )r%   r·   Úaggregated_entitiesÚprevious_entityr    Úcurrent_lengthÚprevious_lengths          r(   r¯   Ú:TokenClassificationPipeline.aggregate_overlapping_entities¤  sÕ   € Üˆx‹=˜AÓØˆOÜ˜(Ñ(<Ñ=ˆØ ÐØ" 1™+ˆÛˆFØ˜wÑ'¨6°'©?ÕS¸_ÈUÑ=SÖSØ!'¨¡°¸±Ñ!@Ø"1°%Ñ"8¸?È7Ñ;SÑ"SØ!Ó3Ø&,’OØ#Ó6¸6À'¹?È_Ð]dÑMeÓ;eØ&,’Oñ <fÑ6ð $×*Ñ*¨?Ô;Ø"(’ñ ð 	×"Ñ" ?Ô3Ø"Ð"r+   r{   rz   rµ   r“   r}   r~   c	                 ó¤  • / n	[        U5       GH½  u  p«XZ   (       a  M  U R                  R                  [        X*   5      5      nUGbb  XJ   u  pÞUb  Ub  Xz   nUb  X   u  nnUU-  nUU-  n[	        U[        5      (       d0  U R
                  S:X  a   UR                  5       nUR                  5       nXU n[        U R                  SS5      (       aH  [        U R                  R                  R                  SS5      (       a  [        U5      [        U5      :g  nOgU[        R                  [        R                  [        R                  1;   a  [        R                   " S["        5        US:„  =(       a    SXS-
  US-    ;  n[        X*   5      U R                  R$                  :X  a  UnS	nOSnSnS	nUUUUU
US
.nU	R'                  U5        GMÀ     U	$ )zTFuse various numpy arrays into dicts with all the information needed for aggregationNrœ   Ú
_tokenizerÚcontinuing_subword_prefixz?Tokenizer does not support real words, using fallback heuristicr   rW   r   F)r‰   rµ   r¼   rÂ   ÚindexÚ
is_subword)Ú	enumerater_   Úconvert_ids_to_tokensÚintr   rK   ÚitemÚgetattrrË   r•   r    r4   r=   r>   r?   r\   r]   ÚUserWarningÚunk_token_idr   )r%   r{   rz   rµ   r   r“   rT   r}   r~   r¶   ÚidxÚtoken_scoresr‰   Ú	start_indÚend_indÚ
word_indexÚ
start_charÚ_Úword_refrÎ   Ú
pre_entitys                        r(   r¬   Ú/TokenClassificationPipeline.gather_pre_entities¸  sß  € ð ˆÜ!*¨6×!2ÑˆCà"×'Ùà—>‘>×7Ñ7¼¸I¹NÓ8KÓLˆDØÒ)Ø%3Ñ%8Ñ"	ð Ñ'Ð,=Ñ,IØ!)¡JØ!Ñ-Ø(9Ñ(E™˜
 AØ! ZÑ/˜	Ø :Ñ-˜ä! )¬S×1Ñ1Ø—~‘~¨Ó-Ø$-§N¡NÓ$4˜	Ø")§,¡,£.˜Ø#¨gÐ6Ü˜4Ÿ>™>¨<¸×>Ñ>Ä7Ø—N‘N×-Ñ-×3Ñ3Ð5PÐRV÷Dñ Dô
 "% T£¬c°(«mÑ!;‘Jð ,Ü+×1Ñ1Ü+×3Ñ3Ü+×/Ñ/ð0ó ô
 !ŸšØ]Ü'ôð "+¨Q¡×!e°3¸hÐSTÁ}ÐW`ÐcdÑWdÐ>eÑ3eJäy‘~Ó&¨$¯.©.×*EÑ*EÓEØ#DØ!&Jøà 	ØØ"
ð Ø&Ø"ØØØ(ñˆJð ×Ñ 
×+ñs "3ðt Ðr+   r¶   c                 ó–  • U[         R                  [         R                  1;   an  / nU He  nUS   R                  5       nUS   U   nU R                  R
                  R                  U   UUS   US   US   US   S.nUR                  U5        Mg     OU R                  X5      nU[         R                  :X  a  U$ U R                  U5      $ )Nrµ   rÍ   r‰   r¼   rÂ   )r    rÃ   rÍ   r‰   r¼   rÂ   )
r4   r;   r<   Úargmaxr•   ÚconfigÚid2labelr   Úaggregate_wordsÚgroup_entities)r%   r¶   rT   r·   rÞ   Ú
entity_idxrÃ   r    s           r(   r­   Ú%TokenClassificationPipeline.aggregate  sÕ   € ØÔ$7×$<Ñ$<Ô>Q×>XÑ>XÐ#YÓYØˆHÛ*
Ø'¨Ñ1×8Ñ8Ó:
Ø" 8Ñ,¨ZÑ8à"Ÿj™j×/Ñ/×8Ñ8¸ÑDØ"Ø'¨Ñ0Ø& vÑ.Ø'¨Ñ0Ø% eÑ,ñð —‘ Ö'ò +ð ×+Ñ+¨LÓOˆHàÔ#6×#;Ñ#;Ó;ØˆOØ×"Ñ" 8Ó,Ð,r+   r·   c                 ó$  • U R                   R                  U Vs/ sH  o3S   PM	     sn5      nU[        R                  :X  a@  US   S   nUR	                  5       nXV   nU R
                  R                  R                  U   nOðU[        R                  :X  aH  [        US S9nUS   nUR	                  5       nXV   nU R
                  R                  R                  U   nO”U[        R                  :X  au  [        R                  " U Vs/ sH  o3S   PM	     sn5      n[        R                  " USS9n	U	R	                  5       n
U R
                  R                  R                  U
   nXš   nO[        S5      eUUUUS   S   US	   S
   S.nU$ s  snf s  snf )Nr‰   r   rµ   c                 ó(   • U S   R                  5       $ )Nrµ   )r:   )r    s    r(   r¾   Ú<TokenClassificationPipeline.aggregate_word.<locals>.<lambda>  s   € ¸&ÀÑ:J×:NÑ:NÔ:Pr+   rÀ   )rž   zInvalid aggregation_strategyr¼   r   rÂ   )r    rÃ   r‰   r¼   rÂ   )r_   Úconvert_tokens_to_stringr4   r=   rá   r•   râ   rã   r?   r:   r>   r©   ÚstackÚnanmeanr$   )r%   r·   rT   r    r‰   rµ   rÖ   rÃ   Ú
max_entityÚaverage_scoresræ   Ú
new_entitys               r(   Úaggregate_wordÚ*TokenClassificationPipeline.aggregate_word  s|  € Ø~‰~×6Ñ6ÑU]Ó7^ÑU]È6¸v¼ÑU]Ñ7^Ó_ˆØÔ#6×#<Ñ#<Ó<Ø˜a‘[ Ñ*ˆFØ—-‘-“/ˆCØ‘KˆEØ—Z‘Z×&Ñ&×/Ñ/°Ñ4‰FØ!Ô%8×%<Ñ%<Ó<Ü˜XÑ+PÑQˆJØ Ñ)ˆFØ—-‘-“/ˆCØ‘KˆEØ—Z‘Z×&Ñ&×/Ñ/°Ñ4‰FØ!Ô%8×%@Ñ%@Ó@Ü—X’X¹hÓG¹h°F hÔ/¹hÑGÓHˆFÜŸZšZ¨°QÑ7ˆNØ'×.Ñ.Ó0ˆJØ—Z‘Z×&Ñ&×/Ñ/°
Ñ;ˆFØ"Ñ.‰EäÐ;Ó<Ð<àØØØ˜a‘[ Ñ)Ø˜B‘< Ñ&ñ
ˆ
ð Ðùò7 8_ùò Hs   šFÄFc                 óT  • U[         R                  [         R                  1;   a  [        S5      e/ nSnU HK  nUc  U/nM  US   (       a  UR	                  U5        M(  UR	                  U R                  XB5      5        U/nMM     Ub   UR	                  U R                  XB5      5        U$ )zÚ
Override tokens from a given word that disagree to force agreement on word boundaries.

Example: micro|soft| com|pany| B-ENT I-NAME I-ENT I-ENT will be rewritten with first strategy as microsoft|
company| B-ENT I-ENT
z;NONE and SIMPLE strategies are invalid for word aggregationNrÎ   )r4   r;   r<   r$   r   rñ   )r%   r·   rT   Úword_entitiesÚ
word_groupr    s         r(   rä   Ú+TokenClassificationPipeline.aggregate_words5  s¯   € ð  Ü×$Ñ$Ü×&Ñ&ð$
ó 
ô ÐZÓ[Ð[àˆØˆ
ÛˆFØÑ!Ø$˜X’
Ø˜×%Ø×!Ñ! &Ö)à×$Ñ$ T×%8Ñ%8¸Ó%ZÔ[Ø$˜X’
ñ ð Ñ!Ø× Ñ  ×!4Ñ!4°ZÓ!VÔWØÐr+   c                 óH  • US   S   R                  SS5      S   n[        R                  " U Vs/ sH  o"S   PM	     sn5      nU Vs/ sH  o"S   PM	     nnW[        R                  " U5      U R                  R                  U5      US   S   US   S	   S
.nU$ s  snf s  snf )zŠ
Group together the adjacent tokens with the same entity predicted.

Args:
    entities (`dict`): The entities predicted by the pipeline.
r   r    Ú-r   r   rÃ   r‰   r¼   rÂ   )r¡   rÃ   r‰   r¼   rÂ   )Úsplitr©   rí   Úmeanr_   rë   )r%   r·   r    rµ   Útokensr¡   s         r(   Úgroup_sub_entitiesÚ.TokenClassificationPipeline.group_sub_entitiesQ  s«   € ð ˜!‘˜XÑ&×,Ñ,¨S°!Ó4°RÑ8ˆÜ—’¹8ÓD¹8° Gœ_¹8ÑDÓEˆÙ/7Ó8©x V˜”.©xˆÐ8ð #Ü—W’W˜V“_Ø—N‘N×;Ñ;¸FÓCØ˜a‘[ Ñ)Ø˜B‘< Ñ&ñ
ˆð Ðùò EùÚ8s   °BÁBÚentity_namec                 ó   • UR                  S5      (       a
  SnUSS  nX#4$ UR                  S5      (       a
  SnUSS  nX#4$ SnUnX#4$ )NzB-ÚBr   zI-ÚI)Ú
startswith)r%   rþ   ÚbiÚtags       r(   Úget_tagÚ#TokenClassificationPipeline.get_tagf  sk   € Ø×!Ñ! $×'Ñ'ØˆBØ˜a˜b/ˆCð ˆwˆð ×#Ñ# D×)Ñ)ØˆBØ˜a˜b/ˆCð ˆwˆð ˆBØˆCØˆwˆr+   c                 ó‚  • / n/ nU H  nU(       d  UR                  U5        M  U R                  US   5      u  pVU R                  US   S   5      u  pxXh:X  a  US:w  a  UR                  U5        Mj  UR                  U R                  U5      5        U/nM     U(       a   UR                  U R                  U5      5        U$ )z“
Find and group together the adjacent tokens with the same entity predicted.

Args:
    entities (`dict`): The entities predicted by the pipeline.
r    r   r   )r   r  rü   )	r%   r·   Úentity_groupsÚentity_group_disaggr    r  r  Úlast_biÚlast_tags	            r(   rå   Ú*TokenClassificationPipeline.group_entitiest  sÂ   € ð ˆØ ÐãˆFÞ&Ø#×*Ñ*¨6Ô2Ùð —l‘l 6¨(Ñ#3Ó4‰GˆBØ $§¡Ð-@ÀÑ-DÀXÑ-NÓ OÑˆGà‹ 2¨£9à#×*Ñ*¨6Ö2ð ×$Ñ$ T×%<Ñ%<Ð=PÓ%QÔRØ'- hÒ#ñ' ö( à× Ñ  ×!8Ñ!8Ð9LÓ!MÔNàÐr+   )rM   rL   )NNNNNFNNrh   )NN))r-   r.   r/   r0   r1   Údefault_input_namesÚ_load_processorÚ_load_image_processorÚ_load_feature_extractorÚ_load_tokenizerr   rI   r   Úboolr4   r   r   rÑ   r!   rd   r   r   r–   r)   r   r   r˜   r;   r¸   r¯   r©   Úndarrayr¬   r­   rñ   rä   rü   r  rå   r2   Ú__classcell__)rP   s   @r(   rB   rB   B   s  ø† ñF"ðH &Ðà€OØ!ÐØ#ÐØ€Oá#EÓ#G÷ 
(ð Ø+/Ø*.Ø>BØ:>Ø.3Ø $Ø#'ñN9ð # 4™.ðN9ð " $™ð	N9ð
 'Ð':Ñ;ðN9ð !  e¨C°¨H¡oÑ!6Ñ7ðN9ð & d™^ðN9ð ˜‘ðN9ð ˜C‘=õN9ð` ØO˜sÐO¨cÐO°d¸4ÀÀSÀ¹>Ñ6JÓOó ØOàØ[˜t C™yÐ[°CÐ[¸DÀÀdÈ3ÐPSÈ8ÁnÑAUÑ<VÓ[ó Ø[ð%2Ø˜C  c¡˜NÑ+ð%2Ø7:ð%2à	ˆtD˜˜c˜‘NÑ# T¨$¨t°C¸°H©~Ñ*>Ñ%?Ð?Ñ	@÷%2ôN9òv
ð4 =P×<TÑ<TÐdhô 4òl#ð8 37Ø=AñGàðGð —:‘:ðGð —
‘
ð	Gð
 !  e¨C°¨H¡oÑ!6Ñ7ðGð  ŸZ™ZðGð 2ðGð ˜4 ¨¡Ñ.Ñ/ðGð $ D¨¨s°C¨x©Ñ$9Ñ:ðGð 
ˆd‰õGðR- d¨4¡jð -ÐH[ð -Ð`dÐeiÑ`jô -ð, t¨D¡zð ÐI\ð Ðaeô ð<¨¨T©
ð ÐJ]ð ÐbfÐgkÑblô ð8¨4°©:ð ¸$ô ð* 3ð ¨5°°c°©?ô ð# t¨D¡zð #°d¸4±j÷ #ò #r+   rB   ) r"   r\   Útypingr   r   r   r   r¨   r©   Úmodels.bert.tokenization_bertr   Úutilsr	   r
   r   r   Úbaser   r   r   r   Ú
tensorflowrF   Úmodels.auto.modeling_tf_autor   r£   Úmodels.auto.modeling_autor   r   r4   rB   ÚNerPipeliner,   r+   r(   Ú<module>r     s    ðÛ Û ß 1Ó 1ã å :÷ó ÷ TÓ Sñ ×ÑÛå^Ù×ÑÛåXôF¨ô Fô:˜,ô ñ Ù¨4Ñ0ðnó!ôDs -ó sóE!ðDsðl *r+   