U
    }g                     @   s  U d Z dZddlZddlmZ ddlmZ ddlZddlmZm	Z	 ddl
mZ ddlmZmZmZmZmZmZ er~dd	lmZ ddlZddlZddlZddlZddlZddlZd
ddddZd-d
eeddddZG dd deZeddddZ dZ!ee"d< dZ#ee"d< d.e$edddZ%d/e$eddd Z&d0e$ed"d#d$Z'd1e$dd"d&d'Z(d2e$edd)d*d+Z)e*d,kreej+,  dS )3z=Diagnostic functions, mainly for use when doing tech support.MIT    N)BytesIO)
HTMLParser)BeautifulSoup__version__)builder_registry)AnyIOListOptionalTupleTYPE_CHECKING)_IncomingMarkupr   datareturnc           	   	   C   s  t dt  t dtj  dddg}|D ]4}tjD ]}||jkr2 q(q2|| t d|  q(d|kr|d z*dd	l	m
} t d
dtt|j  W n tk
r   t d Y nX d|krzddl}t d|j  W n tk
r   t d Y nX t| dr|  } |D ]|}t d|  d}zt| |d}d}W n* tk
rb   t d|  t  Y nX |rt d|  t |  t d qdS )zDiagnostic suite for isolating common problems.

    :param data: Some markup that needs to be explained.
    :return: None; diagnostics are printed to standard output.
    z'Diagnostic running on Beautiful Soup %szPython version %shtml.parserhtml5liblxmlz;I noticed that %s is not installed. Installing it may help.zlxml-xmlr   etreezFound lxml version %s.z.lxml is not installed or couldn't be imported.NzFound html5lib version %sz2html5lib is not installed or couldn't be imported.readz#Trying to parse your markup with %sF)featuresT%s could not parse the markup.z#Here's what %s did with the markup:zP--------------------------------------------------------------------------------)printr   sysversionr   Zbuildersr   removeappendr   r   joinmapstrZLXML_VERSIONImportErrorr   hasattrr   r   	Exception	traceback	print_excZprettify)	r   Zbasic_parsersnameZbuilderr   r   parsersuccessZsoup r+   0/tmp/pip-unpacked-wheel-kgiupv3k/bs4/diagnose.pydiagnose    sN    




r-   T)r   htmlkwargsr   c                 K   sz   ddl m} |dd}t| tr,| d} t| ts>t| }|j|f||d|D ]\}}t	d||j
|jf  qVdS )	a  Print out the lxml events that occur during parsing.

    This lets you see how lxml parses a document when no Beautiful
    Soup code is running. You can use this to determine whether
    an lxml-specific problem is in Beautiful Soup's lxml tree builders
    or in lxml itself.

    :param data: Some markup.
    :param html: If True, markup will be parsed with lxml's HTML parser.
       if False, lxml's XML parser will be used.
    r   r   recoverTutf8)r.   r0   z%s, %4s, %sN)r   r   pop
isinstancer"   encoder	   r   	iterparser   tagtext)r   r.   r/   r   r0   readereventelementr+   r+   r,   
lxml_traceX   s    


 r;   c                   @   s   e Zd ZdZeddddZdeeeeee f  e	dddd	Z
dee	dd
ddZeddddZeddddZeddddZeddddZeddddZeddddZeddddZdS )AnnouncingParserzSubclass of HTMLParser that announces parse events, without doing
    anything else.

    You can use this to get a picture of how html.parser sees a given
    document. The easiest way to do this is to call `htmlparser_trace`.
    N)sr   c                 C   s   t | d S )N)r   )selfr=   r+   r+   r,   _pw   s    zAnnouncingParser._pT)r(   attrshandle_empty_elementr   c                 C   s   |  | d| d d S )N z STARTr?   )r>   r(   r@   rA   r+   r+   r,   handle_starttagz   s    z AnnouncingParser.handle_starttag)r(   check_already_closedr   c                 C   s   |  d|  d S )Nz%s ENDrC   )r>   r(   rE   r+   r+   r,   handle_endtag   s    zAnnouncingParser.handle_endtagr   c                 C   s   |  d|  d S )Nz%s DATArC   r>   r   r+   r+   r,   handle_data   s    zAnnouncingParser.handle_data)r(   r   c                 C   s   |  d|  d S )Nz
%s CHARREFrC   r>   r(   r+   r+   r,   handle_charref   s    zAnnouncingParser.handle_charrefc                 C   s   |  d|  d S )Nz%s ENTITYREFrC   rI   r+   r+   r,   handle_entityref   s    z!AnnouncingParser.handle_entityrefc                 C   s   |  d|  d S )Nz
%s COMMENTrC   rG   r+   r+   r,   handle_comment   s    zAnnouncingParser.handle_commentc                 C   s   |  d|  d S )Nz%s DECLrC   rG   r+   r+   r,   handle_decl   s    zAnnouncingParser.handle_declc                 C   s   |  d|  d S )Nz%s UNKNOWN-DECLrC   rG   r+   r+   r,   unknown_decl   s    zAnnouncingParser.unknown_declc                 C   s   |  d|  d S )Nz%s PIrC   rG   r+   r+   r,   	handle_pi   s    zAnnouncingParser.handle_pi)T)T)__name__
__module____qualname____doc__r"   r?   r
   r   r   boolrD   rF   rH   rJ   rK   rL   rM   rN   rO   r+   r+   r+   r,   r<   o   s"    r<   c                 C   s   t  }||  dS )zPrint out the HTMLParser events that occur during parsing.

    This lets you see how HTMLParser parses a document when no
    Beautiful Soup code is running.

    :param data: Some markup.
    N)r<   feed)r   r)   r+   r+   r,   htmlparser_trace   s    rV   Zaeiou_vowelsZbcdfghjklmnpqrstvwxyz_consonants   )lengthr   c                 C   s:   d}t | D ](}|d dkr"t}nt}|t|7 }q|S )z<Generate a random word-like string.

    :meta private:
        r   )rangerX   rW   randomchoice)rZ   r=   itr+   r+   r,   rword   s    rb      c                 C   s   d dd t| D S )z@Generate a random sentence-like string.

    :meta private:
    rB   c                 s   s   | ]}t td dV  qdS )rc   	   N)rb   r^   randint).0r`   r+   r+   r,   	<genexpr>   s     zrsentence.<locals>.<genexpr>)r    r]   )rZ   r+   r+   r,   	rsentence   s    rh     )num_elementsr   c                 C   s   dddddddg}g }t | D ]r}tdd	}|dkrPt|}|d
|  q|dkrp|ttdd q|dkrt|}|d|  qdd| d S )zDRandomly generate an invalid HTML document.

    :meta private:
    pdivspanr`   bscripttabler      z<%s>   rc   r\   z</%s>z<html>
z</html>)r]   r^   re   r_   r   rh   r    )rj   Z	tag_nameselementsr`   r_   Ztag_namer+   r+   r,   rdoc   s    

ru   順 c           	   	   C   s  t dt  t| }t dt|  dddgddfD ]l}d}z"t }t|| t }d}W n( tk
r   t d	|  t  Y nX |r4t d
||| f  q4ddl	m
} t }|| t }t d||   ddl}| }t }|| t }t d||   dS )z.Very basic head-to-head performance benchmark.z1Comparative parser benchmark on Beautiful Soup %sz3Generated a large invalid HTML document (%d bytes).r   r.   r   r   FTr   z"BS4+%s parsed the markup in %.2fs.r   r   z$Raw lxml parsed the markup in %.2fs.Nz(Raw html5lib parsed the markup in %.2fs.)r   r   ru   lentimer   r%   r&   r'   r   r   ZHTMLr   r   parse)	rj   r   parser_namer*   arn   r   r   r)   r+   r+   r,   benchmark_parsers   s4    


r|   r   )rj   r)   r   c                 C   sX   t  }|j}t| }tt||d}td||| t	|}|
d |dd dS )z7Use Python's profiler on a randomly generated document.)bs4r   r)   zbs4.BeautifulSoup(data, parser)Z
cumulativez_html5lib|bs42   N)tempfileNamedTemporaryFiler(   ru   dictr}   cProfileZrunctxpstatsZStatsZ
sort_statsZprint_stats)rj   r)   Z
filehandlefilenamer   varsstatsr+   r+   r,   profile   s    

r   __main__)T)rY   )rc   )ri   )rv   )rv   r   )-rS   __license__r   ior   html.parserr   r}   r   r   Zbs4.builderr   typingr   r	   r
   r   r   r   Zbs4._typingr   r   r^   r   rx   r&   r   r-   rT   r;   r<   r"   rV   rW   __annotations__rX   intrb   rh   ru   r|   r   rP   stdinr   r+   r+   r+   r,   <module>   s:    	8,#
