U
    grH                  
   @  s  d Z ddlmZ ddlZddlZddlZddlZddlmZ dZ	dZ
dZed	Zd
dd eddD Zd
dd eddD Zd
dd eddD Zd
dd eddD Zd
dd eddD Zd
dd eddD ZedjddddddZedjddddZededededededededd Zed!Zed"ZG d#d$ d$ejZdBd%d&d$d'd(d)Zd%d%d*d+d,Zd-d%d.d/d0d1Z d%d2d2d3d4d5d6Z!d%d%d7d8d9Z"d%d%d:d;d<Z#d%d%d%d=d>d?Z$d%d%d%d=d@dAZ%dS )Ca  
An implementation of `urlparse` that provides URL validation and normalization
as described by RFC3986.

We rely on this implementation rather than the one in Python's stdlib, because:

* It provides more complete URL validation.
* It properly differentiates between an empty querystring and an absent querystring,
  to distinguish URLs with a trailing '?'.
* It handles scheme, hostname, port, and path normalization.
* It supports IDNA hostnames, normalizing them to their encoded form.
* The API supports passing individual components, as well as the complete URL string.

Previously we relied on the excellent `rfc3986` package to handle URL parsing and
validation, but this module provides a simpler alternative, with less indirection
required.
    )annotationsN   )
InvalidURLi   zBABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~z!$&'()*+,;=z%[A-Fa-f0-9]{2} c                 C  s   g | ]}|d krt |qS ))    "   <   >   `   chr.0i r   3/tmp/pip-unpacked-wheel-ql4n0x43/httpx/_urlparse.py
<listcomp>,   s      r   r      c                 C  s   g | ]}|d krt |qS ))r   r   #   r   r	   r   r   r   r   r   r   2   s      c                 C  s   g | ]}|d krt |qS ))	r   r   r   r   r	   ?   r
   {   }   r   r   r   r   r   r   8   s   c                 C  s   g | ]}|d krt |qS )r   r   r   r   r	   r   r
   r   r   /   :   ;   =   @   [   \   ]   ^   |   r   r   r   r   r   r   C   s
   c                 C  s   g | ]}|d krt |qS r   r   r   r   r   r   r   M   s
   c                 C  s   g | ]}|d krt |qS ))r   r   r   r   r	   r   r
   r   r   r   r   r   r   r   r   r    r!   r"   r   r   r   r   r   r   Z   s
   z(?:(?P<scheme>{scheme}):)?(?://(?P<authority>{authority}))?(?P<path>{path})(?:\?(?P<query>{query}))?(?:#(?P<fragment>{fragment}))?z([a-zA-Z][a-zA-Z0-9+.-]*)?z[^/?#]*z[^?#]*z[^#]*z.*scheme	authoritypathqueryfragmentzA(?:(?P<userinfo>{userinfo})@)?(?P<host>{host}):?(?P<port>{port})?z(\[.*\]|[^:@]*))userinfohostportz[^@]*z(\[.*\]|[^:]*))r$   r%   r&   r'   r(   r)   r*   r+   z ^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$z^\[.*\]$c                   @  s   e Zd ZU ded< ded< ded< ded< ded< ded	< ded
< eddddZeddddZdd dddZddddZdS )ParseResultstrr$   r)   r*   
int | Noner+   r&   
str | Noner'   r(   )returnc                 C  sR   d | jr| j dndd| jkr0d| j dn| j| jd k	rJd| j ndgS )Nr   @:[])joinr)   r*   r+   selfr   r   r   r%      s    zParseResult.authorityc                 C  s>   d d| jkrd| j dn| j| jd k	r6d| j ndgS )Nr   r2   r3   r4   )r5   r*   r+   r6   r   r   r   netloc   s
    zParseResult.netloc)kwargsr0   c                 K  s6   |s| S | j | j| j| j| jd}|| td|S )Nr#   r   )r   )r$   r%   r&   r'   r(   updateurlparse)r7   r9   defaultsr   r   r   	copy_with   s    
zParseResult.copy_withc                 C  sh   | j }d| jr| j dnd|r,d| nd| j| jd k	rHd| j nd| jd k	r`d| j ndgS )Nr   r2   //?#)r%   r5   r$   r&   r'   r(   )r7   r%   r   r   r   __str__   s    zParseResult.__str__N)	__name__
__module____qualname____annotations__propertyr%   r8   r=   rA   r   r   r   r   r,      s   
	r,   r-   r/   )urlr9   r0   c           "      K  s  t | tkrtdtdd | D r\tdd | D }| |}d|d| d}t|d|kr|d }t|tr~t|n||d< d	|kr|	d	pd
}|
d\|d< }|d< d|ksd|krt|	dd
pd
td}t|	dd
pd
td}	|	r
| d|	 n||d< d|krR|	dp*d
}
|

d\|d< }|d< |sRd |d< d|kr|dpjd
}d|kr|dr|dsd| d|d< | D ]\}}|d k	rt |tkrtd| dtdd |D r&tdd |D }||}d| d|d| d}t|t| |std| dqt| }|d k	sbt| }|d|d p~d
}|d |d  pd
}|d|d pd
}|d|d }|d!|d! }t|}|d k	st| }|d|d p d
}|d|d pd
}|d|d }| }t|td}t|}t||}|d
k}|d
kpp|d
kpp|d k	}t|||d" |s|rt|}t|td}|d krd n
t|t d} |d krd n
t|t!d}!t"|||||| |!S )#NzURL too longc                 s  s    | ]}|  o|  V  qd S Nisasciiisprintabler   charr   r   r   	<genexpr>   s     zurlparse.<locals>.<genexpr>c                 s  s"   | ]}|  r| s|V  qd S rH   rI   rL   r   r   r   rN      s       z.Invalid non-printable ASCII character in URL, z at position .r+   r8   r   r2   r*   usernamepasswordsafer)   raw_pathr?   r&   r'   r3   r4   zURL component 'z
' too longc                 s  s    | ]}|  o|  V  qd S rH   rI   rL   r   r   r   rN     s     c                 s  s"   | ]}|  r| s|V  qd S rH   rI   rL   r   r   r   rN     s      z-Invalid non-printable ASCII character in URL z component, zInvalid URL component ''r$   r%   r(   )
has_schemehas_authority)#lenMAX_URL_LENGTHr   anynextfind
isinstanceintr-   pop	partitionquoteUSERNAME_SAFEPASSWORD_SAFEget
startswithendswithitemsCOMPONENT_REGEX	fullmatch	URL_REGEXmatchAssertionError	groupdictAUTHORITY_REGEXlowerUSERINFO_SAFEencode_hostnormalize_portvalidate_pathnormalize_path	PATH_SAFE
QUERY_SAFE	FRAG_SAFEr,   )"rG   r9   rM   idxerrorr+   r8   _rP   rQ   rT   Z	seperatorr*   keyvalueZ	url_matchZurl_dictr$   r%   r&   r'   fragZauthority_matchZauthority_dictr)   Zparsed_schemeZparsed_userinfoZparsed_hostZparsed_portrV   rW   Zparsed_pathZparsed_queryZparsed_fragr   r   r   r;      s    


"




r;   )r*   r0   c                 C  s   | sdS t | rJzt|  W n$ tjk
rD   td| Y nX | S t| rzt| dd  W n$ tjk
r   td| Y nX | dd S |  rd}t	| 
 t| dS zt| 
 dW S  tjk
 r   td	| Y nX d S )
Nr   zInvalid IPv4 address: r   zInvalid IPv6 address: z"`{}%|\rR   asciizInvalid IDNA hostname: )IPv4_STYLE_HOSTNAMErk   	ipaddressIPv4AddressAddressValueErrorr   IPv6_STYLE_HOSTNAMEIPv6AddressrJ   ra   ro   
SUB_DELIMSidnaencodedecode	IDNAError)r*   ZWHATWG_SAFEr   r   r   rq   \  s*    

	rq   zstr | int | Noner.   )r+   r$   r0   c                 C  sj   | d ks| dkrd S zt | }W n" tk
rB   td| Y nX dddddd|}||krfd S |S )Nr   zInvalid port:    P   i  )ftphttphttpswswss)r^   
ValueErrorr   rd   )r+   r$   Zport_as_intdefault_portr   r   r   rr     s    
rr   boolNone)r&   rV   rW   r0   c                 C  sJ   |r| r|  dstd|sF|sF|  dr4td|  drFtddS )z
    Path validation rules that depend on if the URL contains
    a scheme or authority component.

    See https://datatracker.ietf.org/doc/html/rfc3986.html#section-3.3
    /z7For absolute URLs, path must be empty or begin with '/'r>   z3Relative URLs cannot have a path starting with '//'r2   z2Relative URLs cannot have a path starting with ':'N)re   r   )r&   rV   rW   r   r   r   rs     s    

rs   )r&   r0   c                 C  sv   d| kr| S |  d}d|kr*d|kr*| S g }|D ]8}|dkr@q2|dkr`|rj|dgkrj|  q2|| q2d|S )z
    Drop "." and ".." segments from a URL path.

    For example:

        normalize_path("/path/./to/somewhere/..") == "/path/to"
    rO   r   z..r   )splitr_   appendr5   )r&   
componentsoutput	componentr   r   r   rt     s    	

rt   )stringr0   c                 C  s   d dd | dD S )Nr   c                 S  s   g | ]}d |dqS )%Z02Xr   )r   byter   r   r   r     s     zPERCENT.<locals>.<listcomp>zutf-8)r5   r   )r   r   r   r   PERCENT  s    r   )r   rS   r0   c                   s.   t |  |  s| S d fdd| D S )z1
    Use percent-encoding to quote a string.
    r   c                   s    g | ]}| kr|nt |qS r   )r   rL   ZNON_ESCAPED_CHARSr   r   r     s     z#percent_encoded.<locals>.<listcomp>)UNRESERVED_CHARACTERSrstripr5   )r   rS   r   r   r   percent_encoded  s    
r   c           
      C  s   g }d}t t| D ]T}| |  }}|d}||krZ| || }|t||d || |}q|t| kr| |d }	|t|	|d d	|S )a  
    Use percent-encoding to quote a string, omitting existing '%xx' escape sequences.

    See: https://www.rfc-editor.org/rfc/rfc3986#section-2.1

    * `string`: The string to be percent-escaped.
    * `safe`: A string containing characters that may be treated as safe, and do not
        need to be escaped. Unreserved characters are always treated as safe.
        See: https://www.rfc-editor.org/rfc/rfc3986#section-2.3
    r   rR   Nr   )
refinditerPERCENT_ENCODED_REGEXstartendgroupr   r   rX   r5   )
r   rS   partscurrent_positionrk   Zstart_positionZend_positionZmatched_textZleading_textZtrailing_textr   r   r   ra     s    

ra   )r   )&__doc__
__future__r   r   r   typingr   _exceptionsr   rY   r   r   compiler   r5   rangerw   rv   ru   rb   rc   rp   formatrj   rn   rh   r   r   
NamedTupler,   r;   rq   rr   rs   rt   r   r   ra   r   r   r   r   <module>   s   



7 /