o
    tBh!                     @   s"  zd dl ZW n ey   d dlZY nw d dlmZ d dlmZ d dlmZm	Z	m
Z
mZmZ d dlZd dlmZ d dlmZ d dlmZ d dlmZmZmZmZmZmZ eed	d
edefddZeed	d
edefddZeed	d
edee fddZeed	d
edefddZ d
edefddZ!eed	d
edefddZ"eed	d
edefddZ#eed	d
edefddZ$eed	d
edefddZ%d
edefddZ&eed	d
edefd d!Z'eed	d
edefd"d#Z(eed	d
edefd$d%Z)eed	d
edefd&d'Z*eed	d
edefd(d)Z+ee,ed	d*edefd+d,Z-dId.e.d/e/dee fd0d1Z0ed2d	d3edefd4d5Z1d.e.de	ee e.f fd6d7Z2d8edefd9d:Z3dJd<ed=edefd>d?Z4d@edee fdAdBZ5dCedDede6fdEdFZ7dCedDedefdGdHZ8dS )K    N)IncrementalDecoder)findall)OptionalTupleUnionListSet)MultibyteIncrementalDecoder)aliases)	lru_cache)UNICODE_RANGES_COMBINEDUNICODE_SECONDARY_RANGE_KEYWORDRE_POSSIBLE_ENCODING_INDICATIONENCODING_MARKSUTF8_MAXIMAL_ALLOCATIONIANA_SUPPORTED_SIMILAR)maxsize	characterreturnc                 C   sL   zt | }W n
 ty   Y dS w d|v p%d|v p%d|v p%d|v p%d|v S )NFz
WITH GRAVEz
WITH ACUTEzWITH CEDILLAzWITH DIAERESISzWITH CIRCUMFLEXunicodedataname
ValueErrorr   description r   o/var/www/html/riverr-enterprise-integrations-main/venv/lib/python3.10/site-packages/charset_normalizer/utils.pyis_accentuated   s   (r   c                 C   s.   t | }|s	| S |d}tt|d dS )N r      )r   decompositionsplitchrint)r   
decomposedcodesr   r   r   remove_accent   s   

r&   c                 C   s.   t | }t D ]\}}||v r|  S qdS )zK
    Retrieve the Unicode range official name from a single character.
    N)ordr   items)r   character_ord
range_name	ord_ranger   r   r   unicode_range,   s   r,   c                 C   *   z
t | }W d|v S  ty   Y dS w )NFLATINr   r   r   r   r   is_latin:   s   r/   c                 C   s&   z|  d W dS  ty   Y dS w )NasciiFT)encodeUnicodeEncodeErrorr   r   r   r   is_asciiC   s   r4   c                 C   s2   t | }d|v rdS t| }|d u rdS d|v S )NPTFPunctuationr   categoryr,   r   character_categorycharacter_ranger   r   r   is_punctuationJ   s   
r<   c                 C   s:   t | }d|v sd|v rdS t| }|d u rdS d|v S )NSNTFFormsr7   r9   r   r   r   	is_symbolY   s   
r@   c                 C   s&   |   s| dv r
dS t| }d|v S )N)u   ｜+,;<>TZ)isspacer   r8   r   r:   r   r   r   is_separatorh   s   
rI   c                 C   s   |   |  kS N)islowerisupperr3   r   r   r   is_case_variabler   s   rM   c                 C   s   t | }d|kS )NCo)r   r8   rH   r   r   r   is_private_use_onlyw   s   
rO   c                 C   r-   )NFCJKr   r   character_namer   r   r   is_cjk}      rS   c                 C   r-   )NFHIRAGANAr   rQ   r   r   r   is_hiragana   rT   rV   c                 C   r-   )NFKATAKANAr   rQ   r   r   r   is_katakana   rT   rX   c                 C   r-   )NFHANGULr   rQ   r   r   r   	is_hangul   rT   rZ   c                 C   r-   )NFTHAIr   rQ   r   r   r   is_thai   rT   r\   r*   c                 C   s   t D ]	}|| v r dS qdS )NTF)r   )r*   keywordr   r   r   is_unicode_range_secondary   s
   r^      sequencesearch_zonec                 C   s   t | tstt| }tt| d||kr|n| jddd}t|dkr'dS |D ]'}| dd}t	
 D ]\}}||krE|    S ||krO|    S q7q)dS )zW
    Extract using ASCII-only decoder any specified encoding in the first n-bytes.
    Nr0   ignoreerrorsr   -_)
isinstancebytes	TypeErrorlenr   r   decodelowerreplacer
   r(   )r`   ra   seq_lenresultsspecified_encodingencoding_aliasencoding_ianar   r   r   any_specified_encoding   s&   
 rs      r   c                 C   s    | dv pt td| jtS )zQ
    Verify is a specific encoding is a multi byte one based on it IANA name
    >	   utf_7utf_8utf_16utf_32	utf_16_be	utf_16_le	utf_32_be	utf_32_le	utf_8_sigencodings.{})
issubclass	importlibimport_moduleformatr   r	   )r   r   r   r   is_multi_byte_encoding   s   
r   c                 C   sJ   t D ] }t | }t|tr|g}|D ]}| |r!||f    S qqdS )z9
    Identify and extract SIG/BOM in given sequence.
    )N    )r   rg   rh   
startswith)r`   iana_encodingmarksmarkr   r   r   identify_sig_or_bom   s   

r   r   c                 C   s   | dvS )N>   rw   rx   r   )r   r   r   r   should_strip_sig_or_bom   s   r   Tcp_namestrictc                 C   sP   |   dd} t D ]\}}| |ks| |kr|  S q|r&td| | S )Nre   rf   z Unable to retrieve IANA for '{}')rl   rm   r
   r(   r   r   )r   r   rq   rr   r   r   r   	iana_name   s   r   decoded_sequencec                 C   s4   t  }| D ]}t|}|d u rq|| qt|S rJ   )setr,   addlist)r   rangesr   r;   r   r   r   
range_scan  s   r   iana_name_aiana_name_bc           	      C   s   t | st |r
dS td| j}td|j}|dd}|dd}d}tddD ]}t|g}||||krB|d7 }q-|d S )	Ng        r~   rb   rc   r            )r   r   r   r   r   rangerh   rk   )	r   r   	decoder_a	decoder_bid_aid_bcharacter_match_countito_be_decodedr   r   r   cp_similarity  s   


r   c                 C   s   | t v o	|t |  v S )z
    Determine if two code page are at least 80% similar. IANA_SUPPORTED_SIMILAR dict was generated using
    the function cp_similarity.
    )r   )r   r   r   r   r   is_cp_similar(  s   r   )r_   )T)9unicodedata2r   ImportErrorcodecsr   rer   typingr   r   r   r   r   r   _multibytecodecr	   encodings.aliasesr
   	functoolsr   charset_normalizer.constantr   r   r   r   r   r   strboolr   r&   r,   r/   r4   r<   r@   rI   rM   rO   rS   rV   rX   rZ   r\   rj   r^   rh   r#   rs   r   r   r   r   r   floatr   r   r   r   r   r   <module>   sh     						
