a
    xdK                     @   s  d Z dZddlZddlmZ ddlmZ ddlZddlmZm	Z	 ddl
mZ ddlZddlZddlZddlZddlZddlZddlZddlZdd	 Zd#ddZG dd deZdd ZdZdZd$ddZd%ddZd&ddZd'ddZd(d d!Zed"kreej   dS ))z=Diagnostic functions, mainly for use when doing tech support.MIT    N)StringIO)
HTMLParser)BeautifulSoup__version__)builder_registryc                 C   sd  t dt  t dtj  g d}|D ]4}tjD ]}||jv r0 q&q0|| t d|  q&d|v r|d z*ddl	m
} t d	d
tt|j  W n, ty } zt d W Y d}~n
d}~0 0 d|v rzddl}t d|j  W n. ty } zt d W Y d}~n
d}~0 0 t| dr0|  } n| dsH| dr`t d|   t d dS zPtj| rt d|   t| }| } W d   n1 s0    Y  W n ty   Y n0 t d |D ]}t d|  d}	zt| |d}
d}	W n: ty4 } z t d|  t  W Y d}~n
d}~0 0 |	rTt d|  t |
  t d qdS )zDiagnostic suite for isolating common problems.

    :param data: A string containing markup that needs to be explained.
    :return: None; diagnostics are printed to standard output.
    z'Diagnostic running on Beautiful Soup %szPython version %s)html.parserhtml5liblxmlz;I noticed that %s is not installed. Installing it may help.r
   zlxml-xmlr   etreezFound lxml version %s.z.lxml is not installed or couldn't be imported.Nr	   zFound html5lib version %sz2html5lib is not installed or couldn't be imported.readzhttp:zhttps:z<"%s" looks like a URL. Beautiful Soup is not an HTTP client.zpYou need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup.z7"%s" looks like a filename. Reading data from the file. z#Trying to parse your markup with %sF)featuresT%s could not parse the markup.z#Here's what %s did with the markup:zP--------------------------------------------------------------------------------)printr   sysversionr   Zbuildersr   removeappendr
   r   joinmapstrZLXML_VERSIONImportErrorr	   hasattrr   
startswithospathexistsopen
ValueErrorr   	Exception	traceback	print_excZprettify)dataZbasic_parsersnameZbuilderr   er	   fpparsersuccesssoup r,   D/var/www/html/Ranjet/env/lib/python3.9/site-packages/bs4/diagnose.pydiagnose   sr    






,r.   Tc                 K   sJ   ddl m} |jt| fd|i|D ]\}}td||j|jf  q&dS )a  Print out the lxml events that occur during parsing.

    This lets you see how lxml parses a document when no Beautiful
    Soup code is running. You can use this to determine whether
    an lxml-specific problem is in Beautiful Soup's lxml tree builders
    or in lxml itself.

    :param data: Some markup.
    :param html: If True, markup will be parsed with lxml's HTML parser.
       if False, lxml's XML parser will be used.
    r   r   htmlz%s, %4s, %sN)r
   r   	iterparser   r   tagtext)r%   r/   kwargsr   eventelementr,   r,   r-   
lxml_trace]   s    "r6   c                   @   s`   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d Zdd Z	dd Z
dd Zdd Zdd ZdS )AnnouncingParserzSubclass of HTMLParser that announces parse events, without doing
    anything else.

    You can use this to get a picture of how html.parser sees a given
    document. The easiest way to do this is to call `htmlparser_trace`.
    c                 C   s   t | d S )N)r   )selfsr,   r,   r-   _pu   s    zAnnouncingParser._pc                 C   s   |  d|  d S )Nz%s STARTr:   )r8   r&   attrsr,   r,   r-   handle_starttagx   s    z AnnouncingParser.handle_starttagc                 C   s   |  d|  d S )Nz%s ENDr;   r8   r&   r,   r,   r-   handle_endtag{   s    zAnnouncingParser.handle_endtagc                 C   s   |  d|  d S )Nz%s DATAr;   r8   r%   r,   r,   r-   handle_data~   s    zAnnouncingParser.handle_datac                 C   s   |  d|  d S )Nz
%s CHARREFr;   r>   r,   r,   r-   handle_charref   s    zAnnouncingParser.handle_charrefc                 C   s   |  d|  d S )Nz%s ENTITYREFr;   r>   r,   r,   r-   handle_entityref   s    z!AnnouncingParser.handle_entityrefc                 C   s   |  d|  d S )Nz
%s COMMENTr;   r@   r,   r,   r-   handle_comment   s    zAnnouncingParser.handle_commentc                 C   s   |  d|  d S )Nz%s DECLr;   r@   r,   r,   r-   handle_decl   s    zAnnouncingParser.handle_declc                 C   s   |  d|  d S )Nz%s UNKNOWN-DECLr;   r@   r,   r,   r-   unknown_decl   s    zAnnouncingParser.unknown_declc                 C   s   |  d|  d S )Nz%s PIr;   r@   r,   r,   r-   	handle_pi   s    zAnnouncingParser.handle_piN)__name__
__module____qualname____doc__r:   r=   r?   rA   rB   rC   rD   rE   rF   rG   r,   r,   r,   r-   r7   m   s   r7   c                 C   s   t  }||  dS )zPrint out the HTMLParser events that occur during parsing.

    This lets you see how HTMLParser parses a document when no
    Beautiful Soup code is running.

    :param data: Some markup.
    N)r7   feed)r%   r)   r,   r,   r-   htmlparser_trace   s    rM   ZaeiouZbcdfghjklmnpqrstvwxyz   c                 C   s:   d}t | D ](}|d dkr"t}nt}|t|7 }q|S )z#Generate a random word-like string.r      r   )range_consonants_vowelsrandomchoice)lengthr9   itr,   r,   r-   rword   s    rX      c                 C   s   d dd t| D S )z'Generate a random sentence-like string. c                 s   s   | ]}t td dV  qdS )rY   	   N)rX   rS   randint).0rV   r,   r,   r-   	<genexpr>       zrsentence.<locals>.<genexpr>)r   rP   )rU   r,   r,   r-   	rsentence   s    r`     c                 C   s   g d}g }t | D ]r}tdd}|dkrFt|}|d|  q|dkrf|ttdd q|dkrt|}|d|  qd	d
| d S )z+Randomly generate an invalid HTML document.)pdivspanrV   bscripttabler      z<%s>   rY   rO   z</%s>z<html>
z</html>)rP   rS   r\   rT   r   r`   r   )num_elementsZ	tag_nameselementsrV   rT   Ztag_namer,   r,   r-   rdoc   s    

rm   順 c           
      C   s&  t dt  t| }t dt|  dddgddfD ]|}d}z"t }t||}t }d}W n8 ty } z t d	|  t  W Y d
}~n
d
}~0 0 |r4t d||| f  q4ddl	m
} t }|| t }t d||   dd
l}	|	 }t }|| t }t d||   d
S )z.Very basic head-to-head performance benchmark.z1Comparative parser benchmark on Beautiful Soup %sz3Generated a large invalid HTML document (%d bytes).r
   r/   r	   r   FTr   Nz"BS4+%s parsed the markup in %.2fs.r   r   z$Raw lxml parsed the markup in %.2fs.z(Raw html5lib parsed the markup in %.2fs.)r   r   rm   lentimer   r"   r#   r$   r
   r   ZHTMLr	   r   parse)
rk   r%   r)   r*   ar+   re   r'   r   r	   r,   r,   r-   benchmark_parsers   s4    


rs   r
   c                 C   sX   t  }|j}t| }tt||d}td||| t	|}|
d |dd dS )z7Use Python's profiler on a randomly generated document.)bs4r%   r)   zbs4.BeautifulSoup(data, parser)Z
cumulativez_html5lib|bs42   N)tempfileNamedTemporaryFiler&   rm   dictrt   cProfileZrunctxpstatsZStatsZ
sort_statsZprint_stats)rk   r)   Z
filehandlefilenamer%   varsstatsr,   r,   r-   profile   s    

r~   __main__)T)rN   )rY   )ra   )rn   )rn   r
   )!rK   __license__ry   ior   html.parserr   rt   r   r   Zbs4.builderr   r   rz   rS   rv   rp   r#   r   r.   r6   r7   rM   rR   rQ   rX   r`   rm   rs   r~   rH   stdinr   r,   r,   r,   r-   <module>   s8   G
&



 

