a
    xd                     @   s   d dl Z d dlZd dlmZ d dlZd dlmZ d dlm	Z	 d dl
Z
eejeje h dZdd Zdd	 Zd
d Zdd Zedkre  dS )    N)OrderedDict)get_raw_data)Selector>	   ZvoZprgZtkzvai-VaiiZndszvai-LatnZvaiZcuZkkjc                  C   s   d} t | }dd |D }|d i }|D ]4}g ||< |D ]"}t|d |r>|| | q>q.tD ]}||v rh||= qh|S )Nz!../raw_data/cldr_dates_full/main/c                 S   s   g | ]}t d |s|qS )-[A-Z0-9]+$)research.0	shortname r   Z/var/www/html/Ranjet/env/lib/python3.9/site-packages/dateparser_scripts/order_languages.py
<listcomp>   s   z-_get_language_locale_dict.<locals>.<listcomp>rootr   )oslistdirremover   matchappendavoid_languages)Zcldr_dates_full_dirZavailable_locale_namesZavailable_language_nameslanguage_locale_dictZlanguage_nameZlocale_namelanguager   r   r   _get_language_locale_dict   s    

r   c              	      s  dd }d}t |}t|}W d    n1 s40    Y  |d d }i |D ]x}t|| d }zP|| d }|D ]:}	t||	 d | }
|	v r|	  |
7  < qx|
|	< qxW qR ty   Y qR0 qR| }|t fd	d
dd ttfdd
d}t	dt
|D ]}tdd|| ||< q|  }d}dd t|D }t|t|  fdd|D }t t| }g }|D ]@}	tdd|	}||v r|||d |	 n
||	 q|t| }ttt|}|S )Nc               
   S   s   g d} t d}t|jd}|jrz@|d }|s@tddd |D }|d d	krbtd
W q ty } z t	| t	d | W  Y d }~S d }~0 0 n| S |S )N)&enruesdetrfrfajazhviitnlptarplidkoukthhecssvroeldahufisrskbgnbhrltnohislcaetz:https://w3techs.com/technologies/overview/content_language)textz//table[@class='bars']//a/@hrefzNo bars foundc                 S   s   g | ]}| d ddqS )z+https://w3techs.com/technologies/details/cl -)replacestrip)r	   ir   r   r   r   V       zH_get_language_order.<locals>.get_most_common_locales.<locals>.<listcomp>r   r   z!English is not the first languagez6The website could have changed, please update the code)
requestsgetr   r>   okZxpathZgetall
ValueError	Exceptionprint)Zold_common_localesresponseselZbarsZnew_most_common_localeser   r   r   get_most_common_locales%   s"    '
z4_get_language_order.<locals>.get_most_common_localesz5../raw_data/cldr_core/supplemental/territoryInfo.jsonZsupplementalZterritoryInfoZ_populationZlanguagePopulationZ_populationPercentc                    s    |  | fS Nr   x)language_population_dictr   r   <lambda>y   rD   z%_get_language_order.<locals>.<lambda>T)keyreversec                    s
     | S rO   )indexrP   )language_order_with_duplicatesr   r   rS   ~   rD   )rT   r   _r@   zD../dateparser_data/supplementary_language_data/date_translation_datac                 S   s   g | ]}|d d qS )Nr   )r	   rQ   r   r   r   r      rD   z'_get_language_order.<locals>.<listcomp>c                    s   g | ]}| v r|qS r   r   r   )available_languagesr   r   r      rD   z-\w+r?      )openjsonloadintfloatrI   sortedkeyssetrangelenr   subr   r   unioninsertrV   r   listmapstr)r   rN   Zterritory_info_filefZterritory_contentZterritory_info_data	territory
populationZ	lang_dictr   Zlanguage_populationZmost_common_localeslanguage_orderrV   Zcldr_languagesZsupplementary_date_directoryZsupplementary_languagesZabsent_languagesZremaining_languagesZparent_languager   )rZ   rW   rR   r   _get_language_order$   s\    <
(

rp   c                 C   sB   i }t | D ]0}d|vr$|g||< q||dd  | q|S )Nr@   r   )ra   splitr   )ro   datalangr   r   r   generate_language_map   s    rt   c                  C   s   t   t } t| }d}d}tj|s2t| dtj|ddd }t	 }|D ]*}|| 
 v rrt| | ||< qPg ||< qPdtj|ddd }t|}dtj|ddd }	|d	 |	 d	 | d
 }
t|d}||
 W d    n1 s0    Y  d S )Nz../dateparser/data/z$../dateparser/data/languages_info.pyzlanguage_order = ),z:    )
separatorsindentzlanguage_locale_dict = zlanguage_map = z


w)r   r   rp   r   pathisdirmkdirr]   dumpsr   rb   ra   rt   r\   write)r   ro   Zparent_directoryfilenameZlanguage_order_stringZcomplete_language_locale_dictrT   Zlanguage_locale_dict_stringZlanguage_map_dataZlanguage_map_data_stringZlanguages_info_stringrl   r   r   r   main   s2    

r   __main__)r]   r   collectionsr   regexr   Zdateparser_scripts.utilsr   Zparselr   rE   chdirr{   dirnameabspath__file__r   r   rp   rt   r   __name__r   r   r   r   <module>   s   r
!