
     hD              
          d dl mZ d dlmZmZ ddlmZmZ ddlm	Z	m
Z
mZmZmZmZmZmZmZmZmZmZmZmZmZmZ  G d d          Z G d d	e          Z G d
 de          Z G d de          Z G d de          Z G d de          Z G d de          Z G d de          Z  G d de          Z! ed          dee"         dee"         de#fd            Z$ ed          	 d&d!e"d"e%d#e#de%fd$            Z&d%S )'    )	lru_cache)ListOptional   )COMMON_SAFE_ASCII_CHARACTERSUNICODE_SECONDARY_RANGE_KEYWORD)is_accentuatedis_asciiis_case_variableis_cjkis_emoticon	is_hangulis_hiraganais_katakanais_latinis_punctuationis_separator	is_symbolis_thaiis_unprintableremove_accentunicode_rangec                   V    e Zd ZdZdedefdZdeddfdZd	dZe	de
fd            ZdS )
MessDetectorPluginzy
    Base abstract class used for mess detection plugins.
    All detectors MUST extend and implement given methods.
    	characterreturnc                     t           )z@
        Determine if given character should be fed in.
        NotImplementedErrorselfr   s     Q/var/www/html/Sam_Eipo/venv/lib/python3.11/site-packages/charset_normalizer/md.pyeligiblezMessDetectorPlugin.eligible   
     "!    Nc                     t           )z
        The main routine to be executed upon character.
        Insert the logic in witch the text would be considered chaotic.
        r   r    s     r"   feedzMessDetectorPlugin.feed%   s
    
 "!r%   c                     t           )zB
        Permit to reset the plugin to the initial state.
        r   r!   s    r"   resetzMessDetectorPlugin.reset,   r$   r%   c                     t           )z
        Compute the chaos ratio based on what your feed() has seen.
        Must NOT be lower than 0.; No restriction gt 0.
        r   r)   s    r"   ratiozMessDetectorPlugin.ratio2   s
     "!r%   r   N)__name__
__module____qualname____doc__strboolr#   r'   r*   propertyfloatr,    r%   r"   r   r      s         
"# "$ " " " ""c "d " " " "" " " " "u " " " X" " "r%   r   c                   Z    e Zd Zd	dZdedefdZdeddfdZd	dZe	de
fd            ZdS )
 TooManySymbolOrPunctuationPluginr   Nc                 L    d| _         d| _        d| _        d | _        d| _        d S )Nr   F)_punctuation_count_symbol_count_character_count_last_printable_char_frenzy_symbol_in_wordr)   s    r"   __init__z)TooManySymbolOrPunctuationPlugin.__init__<   s0    '("#%&37!,1###r%   r   c                 *    |                                 S Nisprintabler    s     r"   r#   z)TooManySymbolOrPunctuationPlugin.eligibleD       $$&&&r%   c                 (   | xj         dz  c_         || j        k    ro|t          vrft          |          r| xj        dz  c_        nF|                                du r0t          |          r!t          |          du r| xj        dz  c_        || _        d S )Nr   F   )	r<   r=   r   r   r:   isdigitr   r   r;   r    s     r"   r'   z%TooManySymbolOrPunctuationPlugin.feedG   s    " 222!===i(( (''1,'''!!##u,,i(( -	**e33""a'""$-!!!r%   c                 0    d| _         d| _        d| _        d S Nr   )r:   r<   r;   r)   s    r"   r*   z&TooManySymbolOrPunctuationPlugin.resetY   s     "# !r%   c                 ^    | j         dk    rdS | j        | j        z   | j         z  }|dk    r|ndS )Nr           g333333?)r<   r:   r;   )r!   ratio_of_punctuations     r"   r,   z&TooManySymbolOrPunctuationPlugin.ratio^   sK     A%%3 #d&88!'" (<s'B'B##Kr%   r-   r.   r/   r0   r?   r2   r3   r#   r'   r*   r4   r5   r,   r6   r%   r"   r8   r8   ;   s        2 2 2 2'# '$ ' ' ' '.c .d . . . .$   
 Lu L L L XL L Lr%   r8   c                   Z    e Zd Zd	dZdedefdZdeddfdZd	dZe	de
fd            ZdS )
TooManyAccentuatedPluginr   Nc                 "    d| _         d| _        d S rI   r<   _accentuated_countr)   s    r"   r?   z!TooManyAccentuatedPlugin.__init__k   s    %&'(r%   r   c                 *    |                                 S rA   )isalphar    s     r"   r#   z!TooManyAccentuatedPlugin.eligibleo   s      """r%   c                 h    | xj         dz  c_         t          |          r| xj        dz  c_        d S d S Nr   )r<   r	   rR   r    s     r"   r'   zTooManyAccentuatedPlugin.feedr   sJ    ")$$ 	)##q(####	) 	)r%   c                 "    d| _         d| _        d S rI   rQ   r)   s    r"   r*   zTooManyAccentuatedPlugin.resetx   s     !"#r%   c                 N    | j         dk    rdS | j        | j         z  }|dk    r|ndS )Nr   rK   gffffff?rQ   )r!   ratio_of_accentuations     r"   r,   zTooManyAccentuatedPlugin.ratio|   s<     A%%3'+'>AV'V(=(E(E$$3Nr%   r-   rM   r6   r%   r"   rO   rO   j   s        ) ) ) )## #$ # # # #)c )d ) ) ) )$ $ $ $ Ou O O O XO O Or%   rO   c                   Z    e Zd Zd	dZdedefdZdeddfdZd	dZe	de
fd            ZdS )
UnprintablePluginr   Nc                 "    d| _         d| _        d S rI   )_unprintable_countr<   r)   s    r"   r?   zUnprintablePlugin.__init__   s    '(%&r%   r   c                     dS NTr6   r    s     r"   r#   zUnprintablePlugin.eligible       tr%   c                 d    t          |          r| xj        dz  c_        | xj        dz  c_        d S rV   )r   r]   r<   r    s     r"   r'   zUnprintablePlugin.feed   s@    )$$ 	)##q(##"r%   c                     d| _         d S rI   )r]   r)   s    r"   r*   zUnprintablePlugin.reset   s    "#r%   c                 @    | j         dk    rdS | j        dz  | j         z  S )Nr   rK      )r<   r]   r)   s    r"   r,   zUnprintablePlugin.ratio   s+     A%%3'!+t/DDDr%   r-   rM   r6   r%   r"   r[   r[      s        ' ' ' '# $    #c #d # # # #
$ $ $ $ Eu E E E XE E Er%   r[   c                   Z    e Zd Zd	dZdedefdZdeddfdZd	dZe	de
fd            ZdS )
SuspiciousDuplicateAccentPluginr   Nc                 0    d| _         d| _        d | _        d S rI   _successive_countr<   _last_latin_characterr)   s    r"   r?   z(SuspiciousDuplicateAccentPlugin.__init__   s     &'%&48"""r%   r   c                 H    |                                 ot          |          S rA   )rT   r   r    s     r"   r#   z(SuspiciousDuplicateAccentPlugin.eligible   s!      "":x	':'::r%   c                 l   | xj         dz  c_         | j        t          |          rt          | j                  rr|                                r)| j                                        r| xj        dz  c_        t          |          t          | j                  k    r| xj        dz  c_        || _        d S rV   )r<   rj   r	   isupperri   r   r    s     r"   r'   z$SuspiciousDuplicateAccentPlugin.feed   s    "&2y)) 3t9:: 3   "" ,t'A'I'I'K'K ,&&!+&&Y''=9S+T+TTT&&!+&&%."""r%   c                 0    d| _         d| _        d | _        d S rI   rh   r)   s    r"   r*   z%SuspiciousDuplicateAccentPlugin.reset   s     !" !%)"""r%   c                 @    | j         dk    rdS | j        dz  | j         z  S )Nr   rK   rF   )r<   ri   r)   s    r"   r,   z%SuspiciousDuplicateAccentPlugin.ratio   s+     A%%3&*d.CCCr%   r-   rM   r6   r%   r"   rf   rf      s        9 9 9 9;# ;$ ; ; ; ;/c /d / / / /* * * *
 Du D D D XD D Dr%   rf   c                   Z    e Zd Zd	dZdedefdZdeddfdZd	dZe	de
fd            ZdS )
SuspiciousRanger   Nc                 0    d| _         d| _        d | _        d S rI   )"_suspicious_successive_range_countr<   _last_printable_seenr)   s    r"   r?   zSuspiciousRange.__init__   s     78/%&37!!!r%   r   c                 *    |                                 S rA   rB   r    s     r"   r#   zSuspiciousRange.eligible   rD   r%   c                 D   | xj         dz  c_         |                                st          |          s	|t          v r	d | _        d S | j        	|| _        d S t          | j                  }t          |          }t          ||          r| xj        dz  c_        || _        d S rV   )r<   isspacer   r   rt   r    is_suspiciously_successive_rangers   )r!   r   unicode_range_aunicode_range_bs       r"   r'   zSuspiciousRange.feed   s    " 	i((	 888(,D%F$,(1D%F)6t7P)Q)Q)6y)A)A+O_MM 	933q833$-!!!r%   c                 0    d| _         d| _        d | _        d S rI   )r<   rs   rt   r)   s    r"   r*   zSuspiciousRange.reset   s      !23/$(!!!r%   c                 T    | j         dk    rdS | j        dz  | j         z  }|dk     rdS |S )Nr   rK   rF   g?)r<   rs   )r!   ratio_of_suspicious_range_usages     r"   r,   zSuspiciousRange.ratio   sH     A%%3 3a7!2"' +S003..r%   r-   rM   r6   r%   r"   rq   rq      s        8 8 8 8
'# '$ ' ' ' '.c .d . . . ..) ) ) )
 /u / / / X/ / /r%   rq   c                   Z    e Zd Zd	dZdedefdZdeddfdZd	dZe	de
fd            ZdS )
SuperWeirdWordPluginr   Nc                     d| _         d| _        d| _        d| _        d| _        d| _        d| _        d| _        d| _        d S )Nr   F )	_word_count_bad_word_count_foreign_long_count_is_current_word_bad_foreign_long_watchr<   _bad_character_count_buffer_buffer_accent_countr)   s    r"   r?   zSuperWeirdWordPlugin.__init__   sO     !$%() */!). %&)*!)*!!!r%   r   c                     dS r_   r6   r    s     r"   r#   zSuperWeirdWordPlugin.eligible  r`   r%   c                    |                                 r| xj        |z  c_        t          |          r| xj        dz  c_        | j        du r|t          |          du st          |          r\t          |          du rKt          |          du r:t          |          du r)t          |          du rt          |          du rd| _        d S | j        sd S |                                st          |          st          |          r"| j        r| xj        dz  c_        t          | j                  }| xj        |z  c_        |dk    re| j        |z  dk    rd| _        t          | j        d                   r6| j        d                                         r| xj        dz  c_        d| _        |dk    r| j        r| xj        dz  c_        d| _        | j        r9| xj        dz  c_        | xj        t          | j                  z  c_        d| _        d| _        d| _        d	| _        d S |d
vr>|                                du r*t/          |          rd| _        | xj        |z  c_        d S d S d S d S )Nr   FT   g(\?   r   r   >   _-<=>|~)rT   r   r	   r   r   r   r   r   r   r   r   rw   r   r   r   lenr<   r   rm   r   r   r   rG   r   )r!   r   buffer_lengths      r"   r'   zSuperWeirdWordPlugin.feed  s    	LLI%LLi(( /))Q.))(E11i((E11^I5N5N19%%..i((E11	**e33	**e33I&&%//+/(F| 	F"	&#1)#<#<"	&@LY@W@W"	&l"	& !!$T\!2!2M!!]2!!!!,}<tCC04D- "$,r"233 5R8H8P8P8R8R 5,,1,,04D-""t'?"((A-((,0)( 2$$)$$))S->->>)),1)',D$DL()D%%%@@@!!##u,,)$$ - )-D%LLI%LLLL A@,,,,r%   c                 v    d| _         d| _        d| _        d| _        d| _        d| _        d| _        d| _        d S )Nr   Fr   )r   r   r   r   r   r<   r   r   r)   s    r"   r*   zSuperWeirdWordPlugin.reset=  sG    $)!#(   !$%!#$   r%   c                 P    | j         dk    r| j        dk    rdS | j        | j        z  S )N
   r   rK   )r   r   r   r<   r)   s    r"   r,   zSuperWeirdWordPlugin.ratioG  s3    r!!d&>!&C&C3(4+@@@r%   r-   rM   r6   r%   r"   r   r      s        + + + +# $    4&c 4&d 4& 4& 4& 4&l% % % % Au A A A XA A Ar%   r   c                   ^    e Zd ZdZd
dZdedefdZdeddfdZd
dZ	e
defd	            ZdS )CjkInvalidStopPluginu   
    GB(Chinese) based encoding often render the stop incorrectly when the content does not fit and
    can be easily detected. Searching for the overuse of '丅' and '丄'.
    r   Nc                 "    d| _         d| _        d S rI   _wrong_stop_count_cjk_character_countr)   s    r"   r?   zCjkInvalidStopPlugin.__init__U  s    &')*!!!r%   r   c                     dS r_   r6   r    s     r"   r#   zCjkInvalidStopPlugin.eligibleY  r`   r%   c                 t    |dv r| xj         dz  c_         d S t          |          r| xj        dz  c_        d S d S )N>      丄   丅r   )r   r   r   r    s     r"   r'   zCjkInvalidStopPlugin.feed\  sZ    &&""a'""F) 	+%%*%%%%	+ 	+r%   c                 "    d| _         d| _        d S rI   r   r)   s    r"   r*   zCjkInvalidStopPlugin.resetc  s    !"$%!!!r%   c                 :    | j         dk     rdS | j        | j         z  S )N   rK   )r   r   r)   s    r"   r,   zCjkInvalidStopPlugin.ratiog  s&    $r))3%(AAAr%   r-   )r.   r/   r0   r1   r?   r2   r3   r#   r'   r*   r4   r5   r,   r6   r%   r"   r   r   O  s         
+ + + +# $    +c +d + + + +& & & & Bu B B B XB B Br%   r   c                   Z    e Zd Zd	dZdedefdZdeddfdZd	dZe	de
fd            ZdS )
ArchaicUpperLowerPluginr   Nc                 h    d| _         d| _        d| _        d| _        d| _        d | _        d| _        d S )NFr   T)_buf_character_count_since_last_sep_successive_upper_lower_count#_successive_upper_lower_count_finalr<   _last_alpha_seen_current_ascii_onlyr)   s    r"   r?   z ArchaicUpperLowerPlugin.__init__o  s?    	45,23*890%&/3)-   r%   r   c                     dS r_   r6   r    s     r"   r#   z ArchaicUpperLowerPlugin.eligible|  r`   r%   c                    |                                 ot          |          }|du }|r| j        dk    rt| j        dk    r4|                                du r| j        du r| xj        | j        z  c_        d| _        d| _        d | _        d| _        | xj	        dz  c_	        d| _        d S | j        du rt          |          du rd| _        | j        |                                r| j                                        s-|                                rB| j                                        r)| j        du r| xj        dz  c_        d| _        nd| _        nd| _        | xj	        dz  c_	        | xj        dz  c_        || _        d S )NFr   @   r   TrF   )rT   r   r   rG   r   r   r   r   r   r<   r
   rm   islower)r!   r   is_concerned	chunk_seps       r"   r'   zArchaicUpperLowerPlugin.feed  s    ((**J/?	/J/J E)	 	=AA4::%%''500,5588688 23D.34D0$(D!DI!!Q&!!'+D$F#t++0C0Cu0L0L',D$ ,!!## 	"(=(E(E(G(G 	"!!##	"(,(=(E(E(G(G	" 9$$66!;66 %DII $DII!	",,1,, )r%   c                 h    d| _         d| _        d| _        d| _        d | _        d| _        d| _        d S )Nr   FT)r<   r   r   r   r   r   r   r)   s    r"   r*   zArchaicUpperLowerPlugin.reset  s?     !/0,-.*340 $	#'   r%   c                 :    | j         dk    rdS | j        | j         z  S )Nr   rK   )r<   r   r)   s    r"   r,   zArchaicUpperLowerPlugin.ratio  s&     A%%37$:OOOr%   r-   rM   r6   r%   r"   r   r   n  s        . . . .# $    (*c (*d (* (* (* (*T( ( ( ( Pu P P P XP P Pr%   r      )maxsizery   rz   r   c                    | |dS | |k    rdS d| v rd|v rdS d| v sd|v rdS d| v sd|v r
d| v sd|v rdS |                      d          |                     d          }}|D ]}|t          v r||v r dS | dv |dv }}|s|r
d	| v sd	|v rdS |r|rdS d
| v sd
|v rd	| v sd	|v rdS | dk    s|dk    rdS d	| v sd	|v s| dv r|dv rd| v sd|v rdS d| v sd|v rdS dS )za
    Determine if two Unicode range seen next to each other can be considered as suspicious.
    NTFLatin	Emoticons	Combining )HiraganaKatakanaCJKHangulzBasic Latin)r   r   PunctuationForms)splitr   )ry   rz   keywords_range_akeywords_range_belrange_a_jp_charsrange_b_jp_charss          r"   rx   rx     s    /"9t/))u/!!g&@&@uo%%)G)Gu 	?""g&@&@&&+*H*Hu)8)>)>* *S!! '   000!!!55 "
 	
	

 	33 ' 	 ,   E_$<$<u , u?""h/&A&AO##u'?'?5m++-/O/O5 	  E_$<$<333777O++}/O/O5o%%O)C)C54r%   i   皙?Fdecoded_sequencemaximum_thresholddebugc                    d t                                           D             }t          |           dz   }d}|dk     rd}n|dk    rd}nd}t          | d	z   t	          |                    D ]m\  }}|D ],}	|	                    |          r|	                    |           -|d
k    r	||z  d
k    s	||dz
  k    r!t          d |D                       }||k    r nn|r|D ]}
t          |
j	        |
j
                   t          |d          S )zw
    Compute a mess ratio given a decoded bytes sequence. The maximum threshold does stop the computation earlier.
    c                 "    g | ]} |            S r6   r6   ).0md_classs     r"   
<listcomp>zmess_ratio.<locals>.<listcomp>	  s+     + + +

+ + +r%   r   rK   i       r   r      
r   c              3   $   K   | ]}|j         V  d S rA   )r,   )r   dts     r"   	<genexpr>zmess_ratio.<locals>.<genexpr>   s$      !?!?r"(!?!?!?!?!?!?r%      )r   __subclasses__r   zipranger#   r'   sumprint	__class__r,   round)r   r   r   	detectorslengthmean_mess_ratio!intermediary_mean_mess_ratio_calcr   indexdetectorr   s              r"   
mess_ratior     sh   + +#5#D#D#F#F+ + +I &''!+F O||13))	4,.)),/) 04 7vGG  	5! 	) 	)H  ++ )i((( AII%"CCqHHfqj  !!?!?Y!?!?!???O"333 * 	* 	*B",))))!$$$r%   N)r   F)'	functoolsr   typingr   r   constantr   r   utilsr	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r8   rO   r[   rf   rq   r   r   r   r2   r3   rx   r5   r   r6   r%   r"   <module>r      s         ! ! ! ! ! ! ! ! S S S S S S S S                                   (" " " " " " " "D,L ,L ,L ,L ,L'9 ,L ,L ,L^O O O O O1 O O O4E E E E E* E E E0"D "D "D "D "D&8 "D "D "DJ1/ 1/ 1/ 1/ 1/( 1/ 1/ 1/hWA WA WA WA WA- WA WA WAtB B B B B- B B B>IP IP IP IP IP0 IP IP IPX 4Cc]C5=c]C	C C C CL 4IN'% '%'%.3'%BF'%
'% '% '% '% '% '%r%   