
     ho3                        d dl Z d dlmZ d dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZmZmZmZmZmZmZmZ dd	lmZmZ dd
lmZ ddlmZmZmZ  G d d          Z G d d          Zeee f         Z!ee!         Z" G d d          Z#dS )    N)Counter)aliases)sha256)dumps)sub)Anyr   DictIteratorListOptionalTupleUnion   )NOT_PRINTABLE_PATTERNTOO_BIG_SEQUENCE)
mess_ratio)	iana_nameis_multi_byte_encodingunicode_rangec                      e Zd Z	 d*dededededddee         fd	Zd
e	defdZ
d
e	defdZedefd            Zedefd            Zedefd            Zedee         fd            ZdefdZdefdZd+dZedefd            Zedee         fd            Zedefd            Zedefd            Zedee         fd            Zedefd            Zedefd            Zedefd            Zedefd            Zedefd            Zedefd            Z eded          fd             Z!edefd!            Z"edee         fd"            Z#edee         fd#            Z$d,d$Z%d,d%Z&d-d'edefd(Z'edefd)            Z(dS ).CharsetMatchNpayloadguessed_encodingmean_mess_ratiohas_sig_or_bom	languagesCoherenceMatchesdecoded_payloadc                     || _         || _        || _        || _        || _        d | _        g | _        d| _        d | _        d | _	        || _
        d S )N        )_payload	_encoding_mean_mess_ratio
_languages_has_sig_or_bom_unicode_ranges_leaves_mean_coherence_ratio_output_payload_output_encoding_string)selfr   r   r   r   r   r   s          U/var/www/html/Sam_Eipo/venv/lib/python3.11/site-packages/charset_normalizer/models.py__init__zCharsetMatch.__init__   s[      '.'6,5%348+-,/"04/3&5    otherreturnc                     t          |t                    sGt          d                    t	          |j                  t	          | j                                      | j        |j        k    o| j        |j        k    S )Nz&__eq__ cannot be invoked on {} and {}.)
isinstancer   	TypeErrorformatstr	__class__encodingfingerprintr,   r0   s     r-   __eq__zCharsetMatch.__eq__1   ss    %.. 	8??((#dn*=*=   
 }.X43CuGX3XXr/   c                 N   t          |t                    st          t          | j        |j        z
            }t          | j        |j        z
            }|dk     r<|dk    r6|dk    r | j        |j        k    r| j        |j        k    S | j        |j        k    S | j        |j        k     S )zQ
        Implemented to make sorted available upon CharsetMatches items.
        g{Gz?g{Gz?r    )r3   r   
ValueErrorabschaos	coherencemulti_byte_usage)r,   r0   chaos_differencecoherence_differences       r-   __lt__zCharsetMatch.__lt__:   s     %.. 	"%dj5;&>"?"?&)$.5?*J&K&K d""';d'B'B3&&4>U_+L+L,u/EEE>EO33zEK''r/   c                 j    dt          t          |                     t          | j                  z  z
  S )N      ?)lenr6   rawr,   s    r-   rA   zCharsetMatch.multi_byte_usageM   s&    ST^^c$(mm333r/   c                 p    t          j        dt                     t          t	          |           d          S )z
        Check once again chaos in decoded text, except this time, with full content.
        Use with caution, this can be very slow.
        Notice: Will be removed in 3.0
        z=chaos_secondary_pass is deprecated and will be removed in 3.0rF   )warningswarnDeprecationWarningr   r6   rI   s    r-   chaos_secondary_passz!CharsetMatch.chaos_secondary_passQ   s6     	K	
 	
 	
 #d))S)))r/   c                 :    t          j        dt                     dS )zy
        Coherence ratio on the first non-latin language detected if ANY.
        Notice: Will be removed in 3.0
        z<coherence_non_latin is deprecated and will be removed in 3.0r    )rK   rL   rM   rI   s    r-   coherence_non_latinz CharsetMatch.coherence_non_latin^   s%     	J	
 	
 	
 sr/   c                     t          j        dt                     t          t          dt          |                                                     }t          |                                          S )z_
        Word counter instance on decoded text.
        Notice: Will be removed in 3.0
        z2w_counter is deprecated and will be removed in 3.0 )	rK   rL   rM   r   r   r6   lowerr   split)r,   string_printable_onlys     r-   	w_counterzCharsetMatch.w_counterj   s^     	@BT	
 	
 	
 !$$93D		@Q@Q R R,2244555r/   c                 ^    | j          t          | j        | j        d          | _         | j         S )Nstrict)r+   r6   r!   r"   rI   s    r-   __str__zCharsetMatch.__str__x   s)    <t}dnhGGDL|r/   c                 B    d                     | j        | j                  S )Nz<CharsetMatch '{}' bytes({})>)r5   r8   r9   rI   s    r-   __repr__zCharsetMatch.__repr__~   s    .55dmTEUVVVr/   c                     t          |t                    r|| k    r't          d                    |j                            d |_        | j                            |           d S )Nz;Unable to add instance <{}> as a submatch of a CharsetMatch)r3   r   r=   r5   r7   r+   r'   appendr:   s     r-   add_submatchzCharsetMatch.add_submatch   sk    %.. 	%4--MTTO    E"""""r/   c                     | j         S N)r"   rI   s    r-   r8   zCharsetMatch.encoding   s
    ~r/   c                     g }t          j                    D ]F\  }}| j        |k    r|                    |           &| j        |k    r|                    |           G|S )z
        Encoding name are known by many name, using this could help when searching for IBM855 when it's listed as CP855.
        )r   itemsr8   r]   )r,   also_known_asups       r-   encoding_aliaseszCharsetMatch.encoding_aliases   sn    
 $&MOO 	( 	(DAq}!!$$Q''''!##$$Q'''r/   c                     | j         S r`   r%   rI   s    r-   bomzCharsetMatch.bom       ##r/   c                     | j         S r`   rh   rI   s    r-   byte_order_markzCharsetMatch.byte_order_mark   rj   r/   c                 $    d | j         D             S )z
        Return the complete list of possible languages found in decoded sequence.
        Usually not really useful. Returned list may be empty even if 'language' property return something != 'Unknown'.
        c                     g | ]
}|d          S )r    ).0es     r-   
<listcomp>z*CharsetMatch.languages.<locals>.<listcomp>   s    ...!...r/   r$   rI   s    r-   r   zCharsetMatch.languages   s     /.do....r/   c                    | j         shd| j        v rdS ddlm}m} t          | j                  r || j                  n || j                  }t          |          dk    sd|v rdS |d         S | j         d         d         S )z
        Most probable language found in decoded sequence. If none were detected or inferred, the property will return
        "Unknown".
        asciiEnglishr   )encoding_languagesmb_encoding_languageszLatin BasedUnknown)r$   could_be_from_charsetcharset_normalizer.cdrw   rx   r   r8   rG   )r,   rw   rx   r   s       r-   languagezCharsetMatch.language   s      	  $444 y XWWWWWWW *$-887%%dm444''66  9~~""my&@&@ yQ<q!!$$r/   c                     | j         S r`   )r#   rI   s    r-   r?   zCharsetMatch.chaos   s    $$r/   c                 :    | j         sdS | j         d         d         S )Nr    r   r   rs   rI   s    r-   r@   zCharsetMatch.coherence   s#     	3q!!$$r/   c                 4    t          | j        dz  d          S Nd      )ndigits)roundr?   rI   s    r-   percent_chaoszCharsetMatch.percent_chaos   s    TZ#%q1111r/   c                 4    t          | j        dz  d          S r   )r   r@   rI   s    r-   percent_coherencezCharsetMatch.percent_coherence   s    T^c)15555r/   c                     | j         S )z+
        Original untouched bytes.
        )r!   rI   s    r-   rH   zCharsetMatch.raw   s    
 }r/   c                     | j         S r`   )r'   rI   s    r-   submatchzCharsetMatch.submatch   s
    |r/   c                 2    t          | j                  dk    S Nr   )rG   r'   rI   s    r-   has_submatchzCharsetMatch.has_submatch   s    4<  1$$r/   c                     | j         | j         S d t          |           D             }t          t          d |D                                 | _         | j         S )Nc                 ,    g | ]}t          |          S ro   )r   )rp   chars     r-   rr   z*CharsetMatch.alphabets.<locals>.<listcomp>   s-     0
 0
 0
$(M$0
 0
 0
r/   c                     h | ]}||S ro   ro   )rp   rs     r-   	<setcomp>z)CharsetMatch.alphabets.<locals>.<setcomp>   s    +L+L+L!!+LA+L+L+Lr/   )r&   r6   sortedlist)r,   detected_rangess     r-   	alphabetszCharsetMatch.alphabets   sj    +''0
 0
,/II0
 0
 0
  &d+L+L+L+L+L&M&MNN##r/   c                 6    | j         gd | j        D             z   S )z
        The complete list of encoding that output the exact SAME str result and therefore could be the originating
        encoding.
        This list does include the encoding available in property 'encoding'.
        c                     g | ]	}|j         
S ro   )r8   )rp   ms     r-   rr   z6CharsetMatch.could_be_from_charset.<locals>.<listcomp>   s    "D"D"D!1:"D"D"Dr/   )r"   r'   rI   s    r-   rz   z"CharsetMatch.could_be_from_charset   s%     "D"Dt|"D"D"DDDr/   c                     | S z>
        Kept for BC reasons. Will be removed in 3.0.
        ro   rI   s    r-   firstzCharsetMatch.first   	     r/   c                     | S r   ro   rI   s    r-   bestzCharsetMatch.best  r   r/   utf_8r8   c                     | j         | j         |k    r/|| _         t          |                               |d          | _        | j        S )z
        Method to get re-encoded bytes payload using given target encoding. Default to UTF-8.
        Any errors will be simply ignored by the encoder NOT replaced.
        Nreplace)r*   r6   encoder)   )r,   r8   s     r-   outputzCharsetMatch.output  sI    
  (D,AX,M,M$,D!#&t99#3#3Hi#H#HD ##r/   c                 h    t          |                                                                           S )zw
        Retrieve the unique SHA256 computed using the transformed (re-encoded) payload. Not the original one.
        )r   r   	hexdigestrI   s    r-   r9   zCharsetMatch.fingerprint  s&    
 dkkmm$$..000r/   r`   )r0   r   r1   N)r1   r   )r   ))__name__
__module____qualname__bytesr6   floatboolr   r.   objectr;   rD   propertyrA   rN   rP   TypeCounterrV   rY   r[   r^   r8   r   rf   ri   rl   r   r|   r?   r@   r   r   rH   r   r   r   rz   r   r   r   r9   ro   r/   r-   r   r      s        *.6 66 6 	6
 6 &6 "#6 6 6 62YF Yt Y Y Y Y(F (t ( ( ( (& 4% 4 4 4 X4 
*e 
* 
* 
* X
* 	U 	 	 	 X	 6;s+ 6 6 6 X6    W# W W W W	# 	# 	# 	# #    X 
$s) 
 
 
 X
 $T $ $ $ X$ $ $ $ $ X$ /49 / / / X/ %# % % % X%6 %u % % % X% %5 % % % X%
 2u 2 2 2 X2 65 6 6 6 X6 U    X $~.    X %d % % % X% 	$49 	$ 	$ 	$ X	$ EtCy E E E XE      	$ 	$s 	$ 	$ 	$ 	$ 	$ 1S 1 1 1 X1 1 1r/   r   c                       e Zd ZdZddeee                  fdZdee         fdZ	de
eef         defdZdefd	Zdefd
ZdeddfdZded         fdZded         fdZdS )CharsetMatchesz
    Container with every CharsetMatch items ordered by default from most probable to the less one.
    Act like a list(iterable) but does not implements all related methods.
    Nresultsc                 6    |rt          |          ng | _        d S r`   )r   _results)r,   r   s     r-   r.   zCharsetMatches.__init__$  s    ?F,NF7OOOBr/   r1   c              #   $   K   | j         E d {V  d S r`   r   rI   s    r-   __iter__zCharsetMatches.__iter__'  s&      =         r/   itemc                     t          |t                    r| j        |         S t          |t                    r't	          |d          }| j        D ]}||j        v r|c S t          )z
        Retrieve a single item either by its position or encoding name (alias may be used here).
        Raise KeyError upon invalid index or encoding not present in results.
        F)r3   intr   r6   r   rz   KeyError)r,   r   results      r-   __getitem__zCharsetMatches.__getitem__*  sv    
 dC   	'=&&dC   	"T5))D- " "6777!MMM 8r/   c                 *    t          | j                  S r`   rG   r   rI   s    r-   __len__zCharsetMatches.__len__8  s    4=!!!r/   c                 2    t          | j                  dk    S r   r   rI   s    r-   __bool__zCharsetMatches.__bool__;  s    4=!!A%%r/   c                    t          |t                    s4t          d                    t	          |j                                      t          |j                  t          k    rB| j	        D ]:}|j
        |j
        k    r(|j        |j        k    r|                    |            dS ;| j	                            |           t          | j	                  | _	        dS )z~
        Insert a single match. Will be inserted accordingly to preserve sort.
        Can be inserted as a submatch.
        z-Cannot append instance '{}' to CharsetMatchesN)r3   r   r=   r5   r6   r7   rG   rH   r   r   r9   r?   r^   r]   r   )r,   r   matchs      r-   r]   zCharsetMatches.append>  s    
 $-- 	?FF''    tx==,,,  $(888U[DJ=V=V&&t,,,FFT"""t}--r/   r   c                 .    | j         sdS | j         d         S )zQ
        Simply return the first match. Strict equivalent to matches[0].
        Nr   r   rI   s    r-   r   zCharsetMatches.bestR  s      } 	4}Qr/   c                 *    |                                  S )zP
        Redundant method, call the method best(). Kept for BC reasons.
        )r   rI   s    r-   r   zCharsetMatches.firstZ  s     yy{{r/   r`   )r   r   r   __doc__r   r   r   r.   r
   r   r   r   r6   r   r   r   r   r]   r   r   ro   r/   r-   r   r     s0        
O Ol); < O O O O!(<0 ! ! ! !c3h L    " " " " "&$ & & & &.< .D . . . .( h~.        x/      r/   r   c                       e Zd Zdedee         dee         dee         dedee         deded	ed
ee         defdZe	de
eef         fd            ZdefdZdS )CliDetectionResultpathr8   rf   alternative_encodingsr|   r   r   r?   r@   unicode_pathis_preferredc                     || _         |
| _        || _        || _        || _        || _        || _        || _        || _        |	| _	        || _
        d S r`   )r   r   r8   rf   r   r|   r   r   r?   r@   r   )r,   r   r8   rf   r   r|   r   r   r?   r@   r   r   s               r-   r.   zCliDetectionResult.__init__f  s\     	+7'/+;0E"%$-$2!
 )".r/   r1   c                     | j         | j        | j        | j        | j        | j        | j        | j        | j        | j	        | j
        dS )Nr   r8   rf   r   r|   r   r   r?   r@   r   r   r   rI   s    r-   __dict__zCliDetectionResult.__dict__  sO     I $ 5%)%?"1Z - -
 
 	
r/   c                 0    t          | j        dd          S )NT   )ensure_asciiindent)r   r   rI   s    r-   to_jsonzCliDetectionResult.to_json  s    T]a@@@@r/   N)r   r   r   r6   r   r   r   r   r.   r   r	   r   r   r   ro   r/   r-   r   r   e  s        // 3-/ s)	/
  $Cy/ / 9/ / / / sm/ / / / /4 
$sCx. 
 
 
 X
A A A A A A Ar/   r   )$rK   collectionsr   encodings.aliasesr   hashlibr   jsonr   rer   typingr   r   r	   r
   r   r   r   r   constantr   r   mdr   utilsr   r   r   r   r   r6   r   CoherenceMatchr   r   ro   r/   r-   <module>r      s          % % % % % %                  	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 > = = = = = = =       C C C C C C C C C CD1 D1 D1 D1 D1 D1 D1 D1N@ @ @ @ @ @ @ @F sEz"' ,A ,A ,A ,A ,A ,A ,A ,A ,A ,Ar/   