U
    }gE                     @   st   d dl Z d dlZd dlZd dlZd dlmZ d dlmZmZmZ G dd de	Z
G dd de	ZG dd	 d	e	ZdS )
    N)BeautifulSoup)EntitySubstitutionEncodingDetectorUnicodeDammitc                   @   sf   e Zd ZdZdd Zejdddddgd	d
 Zdd Z	dd Z
dd Zdd Zdd Zdd ZdS )TestUnicodeDammitz"Standalone tests of UnicodeDammit.c                 C   s   d}t |}|j|kstd S )Nu   I'm already Unicode! ☃)r   unicode_markupAssertionError)selfmarkupdammit r   9/tmp/pip-unpacked-wheel-kgiupv3k/bs4/tests/test_dammit.pytest_unicode_input   s    z$TestUnicodeDammit.test_unicode_inputz smart_quotes_to,expect_converted)Nu   ‘’“”)xmlz &#x2018;&#x2019;&#x201C;&#x201D;)htmlz&lsquo;&rsquo;&ldquo;&rdquo;)asciiz''""c                 C   s,   d}t |dg|dj}|d|ks(tdS )zbVerify the functionality of the smart_quotes_to argument
        to the UnicodeDammit constructor.s   <foo></foo>windows-1252)known_definite_encodingssmart_quotes_toz<foo>{}</foo>N)r   r   formatr   )r	   r   Zexpect_convertedr
   Z	convertedr   r   r   test_smart_quotes_to   s    z&TestUnicodeDammit.test_smart_quotes_toc                 C   s0   d}t |}|j dkst|jdks,td S )Ns   Sacré bleu! ☃utf-8u   Sacré bleu! ☃r   original_encodinglowerr   r   )r	   utf8r   r   r   r   test_detect_utf8*   s    z"TestUnicodeDammit.test_detect_utf8c                 C   s4   d}t |dg}|j dks"t|jdks0td S )N   
iso-8859-8u   םולשr   )r	   hebrewr   r   r   r   test_convert_hebrew0   s    z%TestUnicodeDammit.test_convert_hebrewc                 C   s6   d}t |}|j dkst|jd|ks2td S )Ns   ケータイ Watchr   )r   r   r   r   r   encode)r	   utf_8r   r   r   r   /test_dont_see_smart_quotes_where_there_are_none6   s    zATestUnicodeDammit.test_dont_see_smart_quotes_where_there_are_nonec                 C   s,   d d}t|dg}|j dks(td S )N   Räksmörgåsr   r   r!   r   r   r   r   r	   	utf8_datar   r   r   r    test_ignore_inappropriate_codecs<   s    
z2TestUnicodeDammit.test_ignore_inappropriate_codecsc                 C   s6   d d}dD ]"}t||g}|j dkstqd S )Nr$   r   )z.utf8z...z
utF---16.!r%   )r	   r'   Zbad_encodingr   r   r   r   test_ignore_invalid_codecsA   s    
z,TestUnicodeDammit.test_ignore_invalid_codecsc                 C   sL   d d}t|dgd}|j dks*tt|ddgd}|jd ksHtd S )Nr$   r   )Zexclude_encodingsr   r%   r&   r   r   r   test_exclude_encodingsG   s
    
z(TestUnicodeDammit.test_exclude_encodingsN)__name__
__module____qualname____doc__r   pytestmarkparametrizer   r   r    r#   r(   r)   r*   r   r   r   r   r      s"   	
r   c                   @   sT   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dd ZdS )TestEncodingDetectorc                 C   s"   t d}t|j}d|kstd S )Ns'   <?xml version="1.0" encoding="UTF-" ?>u   utf-�)r   list	encodingsr   )r	   Zdetectedr4   r   r   r   Ptest_encoding_detector_replaces_junk_in_encoding_name_with_replacement_characterV   s    
zeTestEncodingDetector.test_encoding_detector_replaces_junk_in_encoding_name_with_replacement_characterc                 C   s(   dD ]}t |dd}d|jkstqd S )N)s&   <html><meta charset="euc-jp" /></html>s&   <html><meta charset='euc-jp' /></html>s$   <html><meta charset=euc-jp /></html>s#   <html><meta charset=euc-jp/></html>Tis_htmlzeuc-jp)r   r   r   r	   datar   r   r   r    test_detect_html5_style_meta_tag]   s    z5TestEncodingDetector.test_detect_html5_style_meta_tagc              	   C   s   d}t jj}ttj zLdd }|t j_t|}d|jks@t	d|j
ksNt	t|d}|jsbt	W 5 ttj |t j_X d S )NsT   ﻿<?xml version="1.0" encoding="UTF-8"?>
<html><b>بتر</b>
<i>ѐ</i></html>c                 S   s   d S Nr   )strr   r   r   noop~   s    zETestEncodingDetector.test_last_ditch_entity_replacement.<locals>.noopTu   �zhtml.parser)bs4r   Z_chardet_dammitloggingdisableWARNINGNOTSETr   Zcontains_replacement_charactersr   r   r   )r	   docchardetr=   r   Zsoupr   r   r   "test_last_ditch_entity_replacementg   s    
z7TestEncodingDetector.test_last_ditch_entity_replacementc                 C   s,   d}t |}d|jkstd|jks(td S )N   < a >   < / a > u   <a>áé</a>utf-16le)r   r   r   r   r8   r   r   r   test_byte_order_mark_removed   s    z1TestEncodingDetector.test_byte_order_mark_removedc                 C   s   d}t |}t |dgd}d|jks(tt |dgd}d|jksDtdgdd |jD ks^td	}t |dgd
gd}d
|jkstdd
gdd |jD kstd S )NrF   zutf-16)r   r   )user_encodingsrG   c                 S   s   g | ]}|d  qS r   r   .0xr   r   r   
<listcomp>   s     zRTestEncodingDetector.test_known_definite_versus_user_encodings.<locals>.<listcomp>r   r   )r   rI   c                 S   s   g | ]}|d  qS rJ   r   rK   r   r   r   rN      s     )r   r   r   tried_encodings)r	   r9   r   beforeafterr   r   r   r   )test_known_definite_versus_user_encodings   s      z>TestEncodingDetector.test_known_definite_versus_user_encodingsc              	   C   s   d}t jdd}t|dgdgdgd}W 5 Q R X |\}|j}t|tsLt|jtksZtd|j	kshtdddgdd	 |j
D kstd S )
Nr   T)recordz	shift-jisr   r   )r   Zoverride_encodingsrI   c                 S   s   g | ]}|d  qS rJ   r   rK   r   r   r   rN      s     zKTestEncodingDetector.test_deprecated_override_encodings.<locals>.<listcomp>)warningscatch_warningsr   message
isinstanceDeprecationWarningr   filename__file__r   rO   )r	   r   wr   warningrV   r   r   r   "test_deprecated_override_encodings   s     z7TestEncodingDetector.test_deprecated_override_encodingsc              	   C   s`   d d}d d}|| | }tt |d W 5 Q R X t|}d|dks\td S )Nu	   ☃☃☃r   u   “Hi, I like Windows!”windows_1252u+   ☃☃☃“Hi, I like Windows!”☃☃☃)r!   r/   ZraisesUnicodeDecodeErrordecoder   	detwingler   )r	   r   r^   rC   Zfixedr   r   r   test_detwingle   s    

z#TestEncodingDetector.test_detwinglec                 C   s<   dD ]2}| d}|ds tt|}||kstqd S )N)u   œu   ₓu   ðr      )r!   endswithr   r   ra   )r	   Ztricky_unicode_charinputoutputr   r   r   +test_detwingle_ignores_multibyte_characters   s
    

z@TestEncodingDetector.test_detwingle_ignores_multibyte_charactersc                 C   s  d}| d}d}| d}tj}||ddd ks6td||ddksJtd||ddks^td||ksntd||ks~td	d
 }||| d kst||| d kst||| ddddkst||dddkst|d	| dddkst|d| ddd kstd S )Nz0<html><head><meta charset="utf-8"></head></html>r   z,<?xml version="1.0" encoding="ISO-8859-1" ?>Fr6   r   Tz
iso-8859-1    i  )r7   search_entire_document)ri      a)r!   r   Zfind_declared_encodingr   )r	   Zhtml_unicodeZ
html_bytesZxml_unicodeZ	xml_bytesmZspacerr   r   r   test_find_declared_encoding   s$    

z0TestEncodingDetector.test_find_declared_encodingN)r+   r,   r-   r5   r:   rE   rH   rR   r]   rb   rg   rl   r   r   r   r   r2   U   s   
% r2   c                   @   s   e Zd ZdZdd Zejdddgdd Zd	d
 Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Zejd!d"d#d$d%d&d'd(d)gd*d+ Zejd,d-d.d/d0gd1d2 Zejd3d4gd5d6 Zd7S )8TestEntitySubstitutionz1Standalone tests of the EntitySubstitution class.c                 C   s
   t | _d S r;   )r   subr	   r   r   r   setup_method  s    z#TestEntitySubstitution.setup_methodzoriginal,substituted)u   foo∀☃õbaru   foo&forall;☃&otilde;bar)u   ‘’foo“”z&lsquo;&rsquo;foo&ldquo;&rdquo;c                 C   s   | j ||kstd S r;   rn   substitute_htmlr   )r	   originalZsubstitutedr   r   r   test_substitute_html!  s    z+TestEntitySubstitution.test_substitute_htmlc                 C   s:   dD ]0\}}d}|| }|| }| j ||kstqd S )N)	)z&models;u   ⊧)z&Nfr;u   𝔑)z&ngeqq;u   ≧̸)z&not;   ¬)z&Not;u   ⫬z||)fjrv   )z&gt;>)z&lt;<z3 %s 4rq   )r	   entityutemplaterawZwith_entitiesr   r   r   test_html5_entity/  s
    z(TestEntitySubstitution.test_html5_entityc                 C   s<   d}d}| j ||kstd}d}| j ||ks8td S )Nu   fjords ⊔ penguinszfjords &sqcup; penguinsu   fjords ⊔︀ penguinszfjords &sqcups; penguinsrq   )r	   r9   r
   r   r   r   )test_html5_entity_with_variation_selectorI  s    z@TestEntitySubstitution.test_html5_entity_with_variation_selectorc                 C   s   d}| j |d|kstd S )NWelcome to "my bar"Frn   Zsubstitute_xmlr   r	   sr   r   r   Itest_xml_converstion_includes_no_quotes_if_make_quoted_attribute_is_falseU  s    z`TestEntitySubstitution.test_xml_converstion_includes_no_quotes_if_make_quoted_attribute_is_falsec                 C   s0   | j dddkst| j dddks,td S )NZWelcomeTz	"Welcome"z	Bob's Barz"Bob's Bar"r   ro   r   r   r   6test_xml_attribute_quoting_normally_uses_double_quotesY  s    zMTestEntitySubstitution.test_xml_attribute_quoting_normally_uses_double_quotesc                 C   s   d}| j |ddkstd S )Nr   Tz'Welcome to "my bar"'r   r   r   r   r   Otest_xml_attribute_quoting_uses_single_quotes_when_value_contains_double_quotes]  s    zfTestEntitySubstitution.test_xml_attribute_quoting_uses_single_quotes_when_value_contains_double_quotesc                 C   s   d}| j |ddkstd S )NWelcome to "Bob's Bar"Tz""Welcome to &quot;Bob's Bar&quot;"r   r   r   r   r   btest_xml_attribute_quoting_escapes_single_quotes_when_value_contains_both_single_and_double_quotesc  s    zyTestEntitySubstitution.test_xml_attribute_quoting_escapes_single_quotes_when_value_contains_both_single_and_double_quotesc                 C   s   d}| j ||kstd S )Nr   r   )r	   quotedr   r   r   <test_xml_quotes_arent_escaped_when_value_is_not_being_quotedi  s    zSTestEntitySubstitution.test_xml_quotes_arent_escaped_when_value_is_not_being_quotedc                 C   s   | j ddkstd S )Nzfoo<bar>zfoo&lt;bar&gt;r   ro   r   r   r   'test_xml_quoting_handles_angle_bracketsm  s    z>TestEntitySubstitution.test_xml_quoting_handles_angle_bracketsc                 C   s   | j ddkstd S )NzAT&TzAT&amp;Tr   ro   r   r   r   #test_xml_quoting_handles_ampersandsp  s    z:TestEntitySubstitution.test_xml_quoting_handles_ampersandsc                 C   s   | j ddkstd S )N&Aacute;T&Tz&amp;Aacute;T&amp;Tr   ro   r   r   r   Etest_xml_quoting_including_ampersands_when_they_are_part_of_an_entitys  s    z\TestEntitySubstitution.test_xml_quoting_including_ampersands_when_they_are_part_of_an_entityc                 C   s   | j ddkstd S )Nr   z&Aacute;T&amp;T)rn   Z"substitute_xml_containing_entitiesr   ro   r   r   r   Dtest_xml_quoting_ignoring_ampersands_when_they_are_part_of_an_entityv  s    
z[TestEntitySubstitution.test_xml_quoting_ignoring_ampersands_when_they_are_part_of_an_entityc                 C   s   d}| j ||kstdS )z:There's no need to do this except inside attribute values.zBob's "bar"Nrq   )r	   textr   r   r    test_quotes_not_html_substituted|  s    z7TestEntitySubstitution.test_quotes_not_html_substitutedzmarkup, old)z	foo & barzfoo &amp; bar)zfoo&zfoo&amp;)z
foo&&& barzfoo&amp;&amp;&amp; bar)zx=1&y=2zx=1&amp;y=2)z&123z&amp;123)z&abcz&amp;abc)z
foo &0 barzfoo &amp;0 bar)zfoo &lolwat barzfoo &amp;lolwat barc                 C   s,   | j ||kst| j ||ks(td S r;   rn   rr   r   substitute_html5_raw)r	   r
   oldr   r   r   'test_unambiguous_ampersands_not_escaped  s    z>TestEntitySubstitution.test_unambiguous_ampersands_not_escapedzmarkup,html,html5,html5raw)&divide;&amp;divide;r   r   )z
&nonesuch;&amp;nonesuch;r   r   )z&#247;
&amp;#247;r   r   )z&#xa1;
&amp;#xa1;r   r   c                 C   s@   | j ||kst| j ||ks(t| j ||ks<td S r;   )rn   rr   r   Zsubstitute_html5r   )r	   r
   r   html5Zhtml5rawr   r   r   'test_when_entity_ampersands_are_escaped  s    z>TestEntitySubstitution.test_when_entity_ampersands_are_escapedzmarkup,expect)z&nosuchentity;z&amp;nosuchentity;c                 C   s,   | j ||kst| j ||ks(td S r;   r   )r	   r
   expectr   r   r   !test_ambiguous_ampersands_escaped  s    z8TestEntitySubstitution.test_ambiguous_ampersands_escapedN)r+   r,   r-   r.   rp   r/   r0   r1   rt   r}   r~   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rm     s`   

	
 rm   )r/   r?   rT   r>   r   Z
bs4.dammitr   r   r   objectr   r2   rm   r   r   r   r   <module>   s   G G