U
    }g>Y                     @   sJ  d Z dgZddlmZmZmZmZmZmZm	Z	m
Z
mZ ddlmZ ddlmZmZmZmZmZmZ ddlZddlmZmZmZmZmZ ddlmZmZmZ ddl Z dd	l!m"Z" dd
lm#Z#m$Z$m%Z%m&Z& e	rddl'm(Z( ddl)m*Z+ G dd deZ,G dd de+j-Z.G dd de/Z0G dd de+j1Z2G dd de2Z3G dd de2Z4dS )MITHTML5TreeBuilder    )	AnycastDictIterableOptionalSequenceTYPE_CHECKINGTupleUnion)	TypeAlias)_AttributeValue_AttributeValues	_Encoding
_Encodings_NamespaceURL
_RawMarkupN)DetectsXMLParsedAsHTML
PERMISSIVEHTMLHTML_5HTMLTreeBuilder)NamespacedAttributePageElementnonwhitespace_re)
namespaces)CommentDoctypeNavigableStringTagBeautifulSoup)basec                   @   s   e Zd ZU dZdZeed< eeee	gZ
ee ed< dZeed< ded< ee ed	< deee ee ee eeeee ee ef  dddZed
dddZeddddZeedddZd
S )r   aj  Use `html5lib <https://github.com/html5lib/html5lib-python>`_ to
    build a tree.

    Note that `HTML5TreeBuilder` does not support some common HTML
    `TreeBuilder` features. Some of these features could theoretically
    be implemented, but at the very least it's quite difficult,
    because html5lib moves the parse tree around as it's being built.

    Specifically:

    * This `TreeBuilder` doesn't use different subclasses of
      `NavigableString` (e.g. `Script`) based on the name of the tag
      in which the string was found.
    * You can't use a `SoupStrainer` to parse only part of a document.
    html5libNAMEfeaturesTTRACKS_LINE_NUMBERSTreeBuilderForHtml5libunderlying_builderuser_specified_encodingN)markupr*   document_declared_encodingexclude_encodingsreturnc                 c   s`   || _ |df|dffD ](\}}|rtjd| d| ddd qtj|dd |d d dfV  d S )	Nr,   r-   zYou provided a value for z0, but the html5lib tree builder doesn't support .   
stacklevelF)r*   warningswarnr   Zwarn_if_markup_looks_like_xml)selfr+   r*   r,   r-   variablename r8   9/tmp/pip-unpacked-wheel-kgiupv3k/bs4/builder/_html5lib.pyprepare_markupW   s    zHTML5TreeBuilder.prepare_markup)r+   r.   c                 C   s   | j dk	r$| j jdk	r$tjddd tj| jd}| jdk	s@t|| j_	t
 }t|tsb| j|d< |j|f|}t|trd|_n|jjjd }|j}||_d| j_	dS )zRun some incoming markup through some parsing process,
        populating the `BeautifulSoup` object in `HTML5TreeBuilder.soup`.
        NzYou provided a value for parse_only, but the html5lib tree builder doesn't support parse_only. The entire document will be parsed.   r1   )treeoverride_encodingr   )soupZ
parse_onlyr3   r4   r$   
HTMLParsercreate_treebuilderr)   AssertionErrorparserdict
isinstancestrr*   parseoriginal_encoding	tokenizerstreamcharEncodingr7   )r5   r+   rB   extra_kwargsdocrG   r8   r8   r9   feedu   s$    


zHTML5TreeBuilder.feed)namespaceHTMLElementsr.   c                 C   s   t || j| jd| _| jS )zCalled by html5lib to instantiate the kind of class it
        calls a 'TreeBuilder'.

        :param namespaceHTMLElements: Whether or not to namespace HTML elements.

        :meta private:
        )store_line_numbers)r(   r>   rO   r)   )r5   rN   r8   r8   r9   r@      s    
  z#HTML5TreeBuilder.create_treebuilder)fragmentr.   c                 C   s   d| S )zSee `TreeBuilder`.z)<html><head></head><body>%s</body></html>r8   )r5   rP   r8   r8   r9   test_fragment_to_document   s    z*HTML5TreeBuilder.test_fragment_to_document)NNN)__name__
__module____qualname____doc__r%   rE   __annotations__r   r   r   r&   r	   r'   boolr   r   r   r   r   r   r:   rM   r@   rQ   r8   r8   r8   r9   r   ;   s*   
   &c                       s   e Zd ZU ded< eej ed< d#eed eed fddZ	d	d
ddZ
eeef ddddZeed	dddZeddddZd	d
ddZd	d
ddZd	ddddZdd
ddZd	ed d!d"Z  ZS )$r(   r"   r>   rB   NT)rN   r>   rO   kwargsc                    s\   |r|| _ n0tjdtdd ddlm} |d	d|i|| _ tt| | d | _	|| _
d S )
NaK  The optionality of the 'soup' argument to the TreeBuilderForHtml5lib constructor is deprecated as of Beautiful Soup 4.13.0: 'soup' is now required. If you can't pass in a BeautifulSoup object here, or you get this warning and it seems mysterious to you, please contact the Beautiful Soup developer team for possible un-deprecation.   r1   r   r!    html.parserrO   )rZ   r[   )r>   r3   r4   DeprecationWarningbs4r"   superr(   __init__rB   rO   )r5   rN   r>   rO   rX   r"   	__class__r8   r9   r_      s&       zTreeBuilderForHtml5lib.__init__Elementr.   c                 C   s   | j   t| j | j d S N)r>   resetrb   r5   r8   r8   r9   documentClass   s    
z$TreeBuilderForHtml5lib.documentClass)tokenr.   c                 C   sP   t t|d }t tt |d }t tt |d }t|||}| j| d S )Nr7   publicIdsystemId)r   rE   r   r   Zfor_name_and_idsr>   object_was_parsed)r5   rh   r7   ri   rj   doctyper8   r8   r9   insertDoctype   s
    z$TreeBuilderForHtml5lib.insertDoctype)r7   	namespacer.   c                 C   s`   d }d }| j d k	r>| jr>| j jj \}}|d k	s6t|d }| jj||||d}t|| j|S )N   )
sourceline	sourcepos)	rB   rO   rH   rI   positionrA   r>   new_tagrb   )r5   r7   rn   rp   rq   tagr8   r8   r9   elementClass   s       z#TreeBuilderForHtml5lib.elementClassTextNode)datar.   c                 C   s   t t|| jS rd   )rv   r   r>   )r5   rw   r8   r8   r9   commentClass   s    z#TreeBuilderForHtml5lib.commentClassc                 C   s
   t  dS )zThis is only used by html5lib HTMLParser.parseFragment(),
        which is never used by Beautiful Soup, only by the html5lib
        unit tests. Since we don't currently hook into those tests,
        the implementation is left blank.
        NNotImplementedErrorrf   r8   r8   r9   fragmentClass   s    z$TreeBuilderForHtml5lib.fragmentClassc                 C   s
   t  dS zThis is only used by the html5lib unit tests. Since we
        don't currently hook into those tests, the implementation is
        left blank.
        Nry   rf   r8   r8   r9   getFragment   s    z"TreeBuilderForHtml5lib.getFragmentnoder.   c                 C   s   | j |j d S rd   )r>   appendelementr5   r   r8   r8   r9   appendChild  s    z"TreeBuilderForHtml5lib.appendChildc                 C   s   | j S rd   )r>   rf   r8   r8   r9   getDocument
  s    z"TreeBuilderForHtml5lib.getDocument)r   r.   c                 C   s
   t  dS r|   ry   r5   r   r8   r8   r9   testSerializer  s    z%TreeBuilderForHtml5lib.testSerializer)NT)rR   rS   rT   rV   r   r$   r?   rW   r   r_   rg   r   rE   rm   ru   rx   r{   r}   r   r   r   __classcell__r8   r8   r`   r9   r(      s&   
  !	r(   c                   @   s   e Zd ZU dZeed< eed< edddZee	e
ef  ddd	Ze
ed
dddZee	e
ef  dddZee
 dddZedddZe
edddZe
edddZd
S )AttrListz@Represents a Tag's attributes in a way compatible with html5lib.r   attrs)r   c                 C   s   || _ t| j j| _d S rd   )r   rC   r   r   r8   r8   r9   r_     s    zAttrList.__init__rc   c                 C   s   t | j  S rd   )listr   items__iter__rf   r8   r8   r9   r     s    zAttrList.__iter__N)r7   valuer.   c                 C   st   | j jp
i }||dg ks<| j j|krf||| j jg krft|tsft|tsTt| j t	
|}|| j |< d S )N*)r   Zcdata_list_attributesgetr7   rD   r   rE   rA   Zattribute_value_list_classr   findall)r5   r7   r   Z	list_attrr8   r8   r9   __setitem__"  s    

zAttrList.__setitem__c                 C   s   t | j S rd   )r   r   r   rf   r8   r8   r9   r   3  s    zAttrList.itemsc                 C   s   t | j S rd   r   r   keysrf   r8   r8   r9   r   6  s    zAttrList.keysc                 C   s
   t | jS rd   )lenr   rf   r8   r8   r9   __len__9  s    zAttrList.__len__)r7   r.   c                 C   s
   | j | S rd   )r   r5   r7   r8   r8   r9   __getitem__<  s    zAttrList.__getitem__c                 C   s   |t | j kS rd   r   r   r8   r8   r9   __contains__?  s    zAttrList.__contains__)rR   rS   rT   rU   r    rV   r   r_   r   r   rE   r   r   r   r   r   intr   r   rW   r   r8   r8   r8   r9   r     s   
r   c                   @   sL   e Zd ZU eed< ded< ee ed< eedddZ	e
jddd	Zd
S )BeautifulSoupNoder   r"   r>   rn   rc   c                 C   s
   t  dS )zReturn the html5lib constant corresponding to the type of
        the underlying DOM object.

        NOTE: This property is only accessed by the html5lib test
        suite, not by Beautiful Soup proper.
        Nry   rf   r8   r8   r9   nodeTypeH  s    zBeautifulSoupNode.nodeTypec                 C   s
   t  d S rd   ry   rf   r8   r8   r9   	cloneNodeT  s    zBeautifulSoupNode.cloneNodeN)rR   rS   rT   r   rV   r   r   propertyr   r   treebuilder_baseNoder   r8   r8   r8   r9   r   C  s   
r   c                   @   s.  e Zd ZU eed< ee ed< edee dddZddd	d
dZe	dddZ
eeeeef f Zeed< eeef Zeed< ee ddddZee
eZd%eed ddddZddddddZd dd	ddZd ddddZeddd Zejdd!d"Zeee ef dd#d$ZeeZdS )&rb   r   rn   r"   )r   r>   rn   c                 C   s&   t j| |j || _|| _|| _d S rd   )r   r   r_   r7   r   r>   rn   )r5   r   r>   rn   r8   r8   r9   r_   \  s    zElement.__init__r   Nr~   c                 C   s  d }t |jtkr|j }}n|j}| |_|d k	rP|jd k	rPt|tsP|j  |d k	r| jjrt | jjd tkr| jjd }| j	|| }|
| || j_n^t|tr| j	|}| jjr| jd}n| jjd k	r| j }n| j}| jj|| j|d d S )NF)parentmost_recent_element)typer   r   r   rD   rE   extractcontentsr>   
new_stringreplace_withZ_most_recent_element_last_descendantnext_elementrk   )r5   r   Zstring_childchildZold_elementZnew_elementr   r8   r8   r9   r   d  sD    



  zElement.appendChildrc   c                 C   s   t | jtri S t| jS rd   )rD   r   r   r   rf   r8   r8   r9   getAttributes  s    zElement.getAttributes_Html5libAttributeName_Html5libAttributes)
attributesr.   c                 C   s   |d k	rt |dkrt| D ](\}}t|tr t| }||= |||< q tt|}| jj	
| j| t| D ]\}}|| j|< qr| jj	| j d S Nr   )r   r   r   rD   tupler   r   r   r>   ZbuilderZ$_replace_cdata_list_attribute_valuesr7   r   Zset_up_substitutions)r5   r   r7   r   new_nameZnormalized_attributesZvalue_or_valuesr8   r8   r9   setAttributes  s    


 zElement.setAttributes)rw   insertBeforer.   c                 C   s4   t | j|| j}|r&| || n
| | d S rd   )rv   r>   r   r   r   )r5   rw   r   textr8   r8   r9   
insertText  s    zElement.insertText)r   refNoder.   c                 C   s   | j |j }t|j tkrz| j jrzt| j j|d  tkrz| j j|d  }t|tks\t| j||j  }|| n| j 	||j  | |_
d S )Nro   )r   indexr   r   r   rA   r>   r   r   insertr   )r5   r   r   r   Zold_nodeZnew_strr8   r8   r9   r     s    zElement.insertBeforec                 C   s   |j   d S rd   )r   r   r   r8   r8   r9   removeChild  s    zElement.removeChild)
new_parentr.   c                 C   s  | j }|j }|j}|dd}t|jdkrJ|dk	s8t|jd }|j}n
d}|j}|j}t|dkr|d }	|dk	r~||	_n||	_||	_|dk	r|	|_n|	|_|dk	r|	|_|d jddd}
|
dk	st||
_|dk	r|
|_d|
_|D ]}||_	|j
| qg |_||_dS )z1Move all of this tag's children into another tag.Fr   Nr   T)Zis_initializedZaccept_self)r   Znext_siblingr   r   r   rA   r   Zprevious_elementZprevious_siblingr   r   )r5   r   r   Znew_parent_elementZfinal_next_elementZnew_parents_last_descendantZnew_parents_last_childZ(new_parents_last_descendant_next_elementZ	to_appendZfirst_childZlast_childs_last_descendantr   r8   r8   r9   reparentChildren  sN    
 zElement.reparentChildrenc                 C   s   t | jjdkS r   )r   r   r   rf   r8   r8   r9   
hasContent7  s    zElement.hasContentc                 C   sB   | j | jj| j}t|| j | j}| jD ]\}}||j|< q*|S rd   )r>   rs   r   r7   rn   rb   r   )r5   rt   r   keyr   r8   r8   r9   r   <  s
    zElement.cloneNodec                 C   s(   | j d krtd | jfS | j | jfS d S )Nhtml)rn   r   r7   rf   r8   r8   r9   getNameTupleC  s    
zElement.getNameTuple)N) rR   rS   rT   r    rV   r   r   r_   r   r   r   r   rE   r   r   r   r   r   r   r   r   r   r   r   r   rW   r   r   r   r   r   	nameTupler8   r8   r8   r9   rb   X  s8   
  3 
  
 Trb   c                   @   s&   e Zd ZU eed< eddddZdS )rv   r   r"   )r   r>   c                 C   s   t j| d  || _|| _d S rd   )r   r   r_   r   r>   )r5   r   r>   r8   r8   r9   r_   O  s    zTextNode.__init__N)rR   rS   rT   r   rV   r_   r8   r8   r8   r9   rv   L  s   
rv   )5__license____all__typingr   r   r   r   r   r	   r
   r   r   Ztyping_extensionsr   Zbs4._typingr   r   r   r   r   r   r3   Zbs4.builderr   r   r   r   r   Zbs4.elementr   r   r   r$   Zhtml5lib.constantsr   r   r   r   r    r]   r"   Zhtml5lib.treebuildersr#   r   r   TreeBuilderr(   objectr   r   r   rb   rv   r8   r8   r8   r9   <module>   s*   , 	sg. u