
     hJ                        d dl Z d dlZd dlmZ d dlmZmZ d dlmZm	Z	m
Z
mZmZ ddlmZmZmZmZ ddlmZmZmZmZ ddlmZ dd	lmZmZ dd
lmZmZmZm Z m!Z!m"Z"m#Z#  e j$        d          Z% e j&                    Z'e'(                     e j)        d                     	 	 	 	 	 	 	 d"de*de+de+de,dee
e-                  dee
e-                  de.de.defdZ/	 	 	 	 	 	 	 d"de	de+de+de,dee
e-                  dee
e-                  de.de.defdZ0	 	 	 	 	 	 	 d"ddde+de+de,dee
e-                  dee
e-                  de.de.defd Z1	 	 	 	 	 	 d#ddde+de+de,dee
e-                  dee
e-                  de.defd!Z2dS )$    N)PathLike)basenamesplitext)AnyBinaryIOListOptionalSet   )coherence_ratioencoding_languagesmb_encoding_languagesmerge_coherence_ratios)IANA_SUPPORTEDTOO_BIG_SEQUENCETOO_SMALL_SEQUENCETRACE)
mess_ratio)CharsetMatchCharsetMatches)any_specified_encodingcut_sequence_chunks	iana_nameidentify_sig_or_bomis_cp_similaris_multi_byte_encodingshould_strip_sig_or_bomcharset_normalizerz)%(asctime)s | %(levelname)s | %(message)s      皙?TF	sequencessteps
chunk_size	thresholdcp_isolationcp_exclusionpreemptive_behaviourexplainreturnc                     t          | t          t          f          s/t          d                    t          |                               |rJt          j        }t                              t                     t          
                    t                     t          |           }	|	dk    rt                              d           |rEt                              t                     t          
                    |pt          j                   t#          t%          | dddg d          g          S |At                              t          d	d
                    |                     d |D             }ng }|At                              t          dd
                    |                     d |D             }ng }|	||z  k    r't                              t          d|||	           d}|	}|dk    r|	|z  |k     rt+          |	|z            }t          |           t,          k     }
t          |           t.          k    }|
r4t                              t          d                    |	                     n5|r3t                              t          d                    |	                     g }|rt1          |           nd}|6|                    |           t                              t          d|           t5                      }g }g }d}d}d}t#                      }t7          |           \  }}|D|                    |           t                              t          dt          |          |           |                    d           d|vr|                    d           |t8          z   D ]}|r||vr
|r||v r||v r|                    |           d}||k    }|ot=          |          }|dv r$|s"t                              t          d|           l	 t?          |          }n8# t@          tB          f$ r$ t                              t          d|           Y w xY w	 |rS|du rOtE          |du r| dt+          d                   n#| t          |          t+          d                   |           n,tE          |du r| n| t          |          d         |          }nx# tF          tH          f$ rd}t          |tH                    s/t                              t          d|tE          |                     |                    |           Y d}~d}~ww xY wd}|D ]}tK          ||          rd} n|r$t                              t          d||           tM          |sdnt          |          |	t+          |	|z                      }|o|duot          |          |	k     } | r!t                              t          d|           t+          t          |          dz            }!tO          |!d          }!d}"d}#g }$g }%	 tQ          | ||||||||	  	        D ]Y}&|$                    |&           |%                    tS          |&|                     |%d          |k    r|"dz  }"|"|!k    s|r|du r nZnJ# tF          $ r=}t                              t          d!|tE          |                     |!}"d}#Y d}~nd}~ww xY w|#s|r|s	 | t+          d"          d         *                    |d#$           n\# tF          $ rO}t                              t          d%|tE          |                     |                    |           Y d}~d}~ww xY w|%rtW          |%          t          |%          z  nd}'|'|k    s|"|!k    r}|                    |           t                              t          d&||"tY          |'d'z  d()                     |dd|fv r*|#s(t%          | ||dg |          }(||k    r|(}n|dk    r|(}n|(}t                              t          d*|tY          |'d'z  d()                     |st[          |          })nt]          |          })|)rAt                              t          d+                    |tE          |)                               g }*|dk    rB|$D ]?}&t_          |&d,|)rd-                    |)          nd          }+|*                    |+           @ta          |*          },|,r4t                              t          d.                    |,|                     |                    t%          | ||'||,|                     ||ddfv rt|'d,k     rnt                              d/|           |r9t                              t                     t          
                    |           t#          ||         g          c S ||k    rnt                              d0|           |r9t                              t                     t          
                    |           t#          ||         g          c S t          |          dk    r|s|s|r t                              t          d1           |r6t                              d2|j1                   |                    |           n{|r||r|r|j2        |j2        k    s|0t                              d3           |                    |           n1|r/t                              d4           |                    |           |rDt                              d5|3                                j1        t          |          dz
             nt                              d6           |r9t                              t                     t          
                    |           |S )7ae  
    Given a raw bytes sequence, return the best possibles charset usable to render str objects.
    If there is no results, it is a strong indicator that the source is binary/not text.
    By default, the process will extract 5 blocs of 512o each to assess the mess and coherence of a given sequence.
    And will give up a particular code page after 20% of measured mess. Those criteria are customizable at will.

    The preemptive behavior DOES NOT replace the traditional detection workflow, it prioritize a particular code page
    but never take it for granted. Can improve the performance.

    You may want to focus your attention to some code page or/and not others, use cp_isolation and cp_exclusion for that
    purpose.

    This function will strip the SIG in the payload/sequence every time except on UTF-16, UTF-32.
    By default the library does not setup any handler other than the NullHandler, if you choose to set the 'explain'
    toggle to True it will alter the logger configuration to add a StreamHandler that is suitable for debugging.
    Custom logging format and handler can be set manually.
    z4Expected object of type bytes or bytearray, got: {0}r   z<Encoding detection on empty bytes, assuming utf_8 intention.utf_8g        F Nz`cp_isolation is set. use this flag for debugging purpose. limited list of encoding allowed : %s.z, c                 .    g | ]}t          |d           S Fr   .0cps     R/var/www/html/Sam_Eipo/venv/lib/python3.11/site-packages/charset_normalizer/api.py
<listcomp>zfrom_bytes.<locals>.<listcomp>[   "    DDD	"e,,DDD    zacp_exclusion is set. use this flag for debugging purpose. limited list of encoding excluded : %s.c                 .    g | ]}t          |d           S r/   r0   r1   s     r4   r5   zfrom_bytes.<locals>.<listcomp>f   r6   r7   z^override steps (%i) and chunk_size (%i) as content does not fit (%i byte(s) given) parameters.r   z>Trying to detect encoding from a tiny portion of ({}) byte(s).zIUsing lazy str decoding because the payload is quite large, ({}) byte(s).z@Detected declarative mark in sequence. Priority +1 given for %s.zIDetected a SIG or BOM mark on first %i byte(s). Priority +1 given for %s.ascii>   utf_16utf_32z[Encoding %s wont be tested as-is because it require a BOM. Will try some sub-encoder LE/BE.z2Encoding %s does not provide an IncrementalDecoderg    A)encodingz9Code page %s does not fit given bytes sequence at ALL. %sTzW%s is deemed too similar to code page %s and was consider unsuited already. Continuing!zpCode page %s is a multi byte encoding table and it appear that at least one character was encoded using n-bytes.      zaLazyStr Loading: After MD chunk decode, code page %s does not fit given bytes sequence at ALL. %sg     j@strict)errorsz^LazyStr Loading: After final lookup, code page %s does not fit given bytes sequence at ALL. %szc%s was excluded because of initial chaos probing. Gave up %i time(s). Computed mean chaos is %f %%.d      )ndigitsz=%s passed initial chaos probing. Mean measured chaos is %f %%z&{} should target any language(s) of {}g?,z We detected language {} using {}z.Encoding detection: %s is most likely the one.zoEncoding detection: %s is most likely the one as we detected a BOM or SIG within the beginning of the sequence.zONothing got out of the detection process. Using ASCII/UTF-8/Specified fallback.z7Encoding detection: %s will be used as a fallback matchz:Encoding detection: utf_8 will be used as a fallback matchz:Encoding detection: ascii will be used as a fallback matchz]Encoding detection: Found %s as plausible (best-candidate) for content. With %i alternatives.z=Encoding detection: Unable to determine any suitable charset.)4
isinstance	bytearraybytes	TypeErrorformattypeloggerlevel
addHandlerexplain_handlersetLevelr   lendebugremoveHandlerloggingWARNINGr   r   logjoinintr   r   r   appendsetr   r   addr   r   ModuleNotFoundErrorImportErrorstrUnicodeDecodeErrorLookupErrorr   rangemaxr   r   decodesumroundr   r   r   r   r<   fingerprintbest)-r"   r#   r$   r%   r&   r'   r(   r)   previous_logger_levellengthis_too_small_sequenceis_too_large_sequenceprioritized_encodingsspecified_encodingtestedtested_but_hard_failuretested_but_soft_failurefallback_asciifallback_u8fallback_specifiedresultssig_encodingsig_payloadencoding_ianadecoded_payloadbom_or_sig_availablestrip_sig_or_bomis_multi_byte_decoderesimilar_soft_failure_testencoding_soft_failedr_multi_byte_bonusmax_chunk_gave_upearly_stop_countlazy_str_hard_failure	md_chunks	md_ratioschunkmean_mess_ratiofallback_entrytarget_languages	cd_ratioschunk_languagescd_ratios_mergeds-                                                r4   
from_bytesr   #   s<   8 i)U!344 
BIIY 
 
 	
  %+\/***i..F{{STTT 	F  111OO1DW_EEE|IwUBPRSSTUUU

5IIl##		
 	
 	
 ED|DDD

6IIl##		
 	
 	
 ED|DDD*u$%%

l	
 	
 	
 
qyyVe^j00%((
"%i..3E"E"%i..4D"D 


LSS 	
 	
 	
 	
 
 


W^^ 	
 	
 	
 (* .BKy)))t  %$$%7888

N	
 	
 	
 uuF)+)+-1N*.K15,..G 3I > >L+$$\222

W		
 	
 	
   )))+++$$W---.? E< E< 	M== 	M\99F""

=!!!)-%1]%B!5 "
:Q;
 ;
 0009M0JJm  
 	*@*O*O!!#[1 	 	 	JJD  
 H		$ )>%)G)G'500 kD		k**"3{#3#3c$ii#?@*	     #&'500 I"3{#3#3#5#56*	# # # #K0 		 		 		a-- 

O!FF	   $**=999HHHH		 +0!$; 	 	 ],@AA ,0) % 	JJi$	   )?AAs;/?/?
 
 " .t+.O$$v- 	  	JJ-	   "%SWWq[!1!1 1155 ! %!			),$ %
 
     '''  E9!=!=>>>R=I--$)$$(999( :-=-F-FE! 	) 	) 	)JJsA	    1$(!!!!!!	) &	%	 *	

#d))++&--mH-MMMM%   

t!FF	   (..}=== EN!VY#i..!@!@SVi''+;?P+P+P#**=999JJ0 o+Q777   '74F!GGG- H ".}iO" " !$666)7&&"g--%3NN"0K

K/C'333		
 	
 	
 % 	D*<]*K*K4]CC 	JJ8??!3'7#8#8    	 G##" 2 2"13>N X)9 : : :TX# #   11111)<< 	JJ299$m    	$  		
 		
 		
 0'7CCC#%%LL@-    7$$_555 5666!7=#9":;;;;;L((LL1  
  7$$_555 5666!7=#9":;;;;; ) 7||q 	. 	,> 	JJa  
  	+LLI"+   NN-....	++3 4 # 4  +~/III'LLUVVVNN;'''' 	+LLUVVVNN>*** VkLLNN#LL1	
 	
 	
 	
 	TUUU /_----...Ns^   P&&2QQBS!!U2AUU A1Z22
[9<3[44[9,\00
^	:A^^	fpc           
      R    t          |                                 |||||||          S )z
    Same thing than the function from_bytes but using a file pointer that is already ready.
    Will not close the file pointer.
    )r   read)r   r#   r$   r%   r&   r'   r(   r)   s           r4   from_fpr     s6     
			 	 	r7   pathzPathLike[Any]c                     t          | d          5 }t          ||||||||          cddd           S # 1 swxY w Y   dS )z
    Same thing than the function from_bytes but with one extra step. Opening and reading given file path in binary mode.
    Can raise IOError.
    rbN)openr   )	r   r#   r$   r%   r&   r'   r(   r)   r   s	            r4   	from_pathr     s     
dD		 

R 	
 	


 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

s   488c           
         t          j        dt                     t          | ||||||          }t	          |           }t          t          |                    }	t          |          dk    r"t          d	                    |                    |
                                }
|	dxx         d|
j        z   z  cc<   t          d	                    t          |                               |d                    |	                              d          5 }|                    |
                                           ddd           n# 1 swxY w Y   |
S )	zi
    Take a (text-based) file path and try to create another file next to it, this time using UTF-8.
    z2normalize is deprecated and will be removed in 3.0r   z;Unable to normalize "{}", no encoding charset seems to fit.-z{}r-   wbN)warningswarnDeprecationWarningr   r   listr   rQ   IOErrorrJ   rg   r<   r   r^   replacerW   writeoutput)r   r#   r$   r%   r&   r'   r(   rt   filenametarget_extensionsresultr   s               r4   	normalizer     s    M<  
  G ~~HXh//00
7||qIPP 
 
 	
 \\^^FaC&/11	CII%%h8I0J0JKKLLd
 
 "	
!!!" " " " " " " " " " " " " " "
 Ms   (EEE)r   r    r!   NNTF)r   r    r!   NNT)3rT   r   osr   os.pathr   r   typingr   r   r   r	   r
   cdr   r   r   r   constantr   r   r   r   mdr   modelsr   r   utilsr   r   r   r   r   r   r   	getLoggerrL   StreamHandlerrO   setFormatter	FormatterrH   rX   floatr^   boolr   r   r   r    r7   r4   <module>r      s           & & & & & & & & 5 5 5 5 5 5 5 5 5 5 5 5 5 5            R Q Q Q Q Q Q Q Q Q Q Q       0 0 0 0 0 0 0 0                  
	/	0	0'''))   GABB   (,(,!%   	
 49% 49%      H (,(,!%   	
 49% 49%      8 (,(,!%
 



 
 	

 49%
 49%
 
 
 
 
 
 
: (,(,!%. .
.. . 	.
 49%. 49%. . . . . . . .r7   