U
    gE                     @   s  d Z ddlZddlZddlZddlmZ ddlmZ ddlm	Z	m
Z
mZmZmZ ddlmZmZmZmZ ddlmZ ddlmZmZmZ dd	lmZ dd
lmZ ddlmZmZ ee Z!e"dddZ#dd Z$dd Z%e"e&dddZ'e"e"e"fdddZ(e"e"dddZ)e"e"dddZ*e"e"dddZ+e"e"e"dd d!Z,e"e"e"d"d#d$Z-ee"d%d&d'Z.e"e"d(d)d*Z/e"ee"ee" f d+d,d-Z0e"e	d(d.d/Z1e"e	d(d0d1Z2e"e"d(d2d3Z3e
e
e"dd4d5d6Z4e
dd7d8d9Z5e"e"dd:d;Z6e"e"dd<d=Z7ee d>d?d@Z8dS )AzBThis module contains all non-cipher related data extraction logic.    N)OrderedDict)datetime)AnyDictListOptionalTuple)parse_qsquote	urlencodeurlparse)Cipher)HTMLParseErrorLiveStreamErrorRegexMatchErrorregex_search)YouTubeMetadata)parse_for_objectparse_for_all_objects
watch_htmlc                 C   s6   zt d| dd}W n tk
r(   Y dS X t|dS )zExtract publish date
    :param str watch_html:
        The html contents of the watch page.
    :rtype: str
    :returns:
        Publish date of the video.
    z;(?<=itemprop=\"datePublished\" content=\")\d{4}-\d{2}-\d{2}r   groupNz%Y-%m-%d)r   r   r   strptime)r   result r   2/tmp/pip-unpacked-wheel-1a9f0fi6/pytube/extract.pypublish_date   s     
r   c                 C   s"   dg}|D ]}|| kr
 dS q
dS )zCheck if live stream recording is available.

    :param str watch_html:
        The html contents of the watch page.
    :rtype: bool
    :returns:
        Whether or not the content is private.
    z,This live stream recording is not available.FTr   )r   Zunavailable_stringsstringr   r   r   recording_available&   s    
r    c                 C   s&   dddg}|D ]}|| kr dS qdS )zCheck if content is private.

    :param str watch_html:
        The html contents of the watch page.
    :rtype: bool
    :returns:
        Whether or not the content is private.
    zFThis is a private video. Please sign in to verify that you may see it.z"simpleText":"Private video"zThis video is private.TFr   )r   Zprivate_stringsr   r   r   r   
is_private8   s    
r!   )r   returnc                 C   s.   zt d| dd W n tk
r(   Y dS X dS )zCheck if content is age restricted.

    :param str watch_html:
        The html contents of the watch page.
    :rtype: bool
    :returns:
        Whether or not the content is age restricted.
    zog:restrictions:ager   r   FT)r   r   r   r   r   r   is_age_restrictedL   s
    	r#   c                 C   sd   t | }|di }d|kr dS d|krZd|krB|d |d gfS d|krZ|d |d fS ddgfS )a  Return the playability status and status explanation of a video.

    For example, a video may have a status of LOGIN_REQUIRED, and an explanation
    of "This is a private video. Please sign in to verify that you may see it."

    This explanation is what gets incorporated into the media player overlay.

    :param str watch_html:
        The html contents of the watch page.
    :rtype: bool
    :returns:
        Playability status and reason of the video.
    playabilityStatusliveStreamability)ZLIVE_STREAMzVideo is a live stream.statusreasonmessagesN)initial_player_responseget)r   Zplayer_responseZstatus_dictr   r   r   playability_status\   s    r+   )urlr"   c                 C   s   t d| ddS )ar  Extract the ``video_id`` from a YouTube url.

    This function supports the following patterns:

    - :samp:`https://youtube.com/watch?v={video_id}`
    - :samp:`https://youtube.com/embed/{video_id}`
    - :samp:`https://youtu.be/{video_id}`

    :param str url:
        A YouTube url containing a video id.
    :rtype: str
    :returns:
        YouTube video id.
    z(?:v=|\/)([0-9A-Za-z_-]{11}).*   r   r   )r,   r   r   r   video_idv   s    r.   c                 C   s   t j| }t|jd d S )ao  Extract the ``playlist_id`` from a YouTube url.

    This function supports the following patterns:

    - :samp:`https://youtube.com/playlist?list={playlist_id}`
    - :samp:`https://youtube.com/watch?v={video_id}&list={playlist_id}`

    :param str url:
        A YouTube url containing a playlist id.
    :rtype: str
    :returns:
        YouTube playlist id.
    listr   )urllibparser   r	   query)r,   parsedr   r   r   playlist_id   s    r4   c                 C   sr   ddddg}|D ]P}t |}|| }|rtd| |d}|d}d| d|   S qtd	d
ddS )a  Extract the ``channel_name`` or ``channel_id`` from a YouTube url.

    This function supports the following patterns:

    - :samp:`https://youtube.com/c/{channel_name}/*`
    - :samp:`https://youtube.com/channel/{channel_id}/*
    - :samp:`https://youtube.com/u/{channel_name}/*`
    - :samp:`https://youtube.com/user/{channel_id}/*

    :param str url:
        A YouTube url containing a channel name.
    :rtype: str
    :returns:
        YouTube channel name.
    z(?:\/(c)\/([%\d\w_\-]+)(\/.*)?)z%(?:\/(channel)\/([%\w\d_\-]+)(\/.*)?)z(?:\/(u)\/([%\d\w_\-]+)(\/.*)?)z"(?:\/(user)\/([%\w\d_\-]+)(\/.*)?)"finished regex search, matched: %sr-      /channel_namepatternsZcallerpatternNrecompilesearchloggerdebugr   r   )r,   r9   r;   regexfunction_matchZ	uri_styleZuri_identifierr   r   r   r8      s"    



 r8   )r.   	watch_urlr"   c                 C   s*   t d| fddt|fddddg}t|S )a  Construct the video_info url.

    :param str video_id:
        A YouTube video identifier.
    :param str watch_url:
        A YouTube watch url.
    :rtype: str
    :returns:
        :samp:`https://youtube.com/get_video_info` with necessary GET
        parameters.
    r.   )Zpsdefaulteurl)Zhlen_UShtml51cZTVHTML5Zcverz
7.20201028)r   r
   _video_info_url)r.   rD   paramsr   r   r   video_info_url   s    
rP   )r.   
embed_htmlr"   c                 C   s^   zt d|dd}W n tk
r*   d}Y nX d|  }td| fd|fd|fd	d
dg}t|S )a<  Construct the video_info url.

    :param str video_id:
        A YouTube video identifier.
    :param str embed_html:
        The html contents of the embed page (for age restricted videos).
    :rtype: str
    :returns:
        :samp:`https://youtube.com/get_video_info` with necessary GET
        parameters.
    z"sts"\s*:\s*(\d+)r-   r    z!https://youtube.googleapis.com/v/r.   rF   stsrH   rK   rM   )r   r   r   rN   )r.   rQ   rS   rF   rO   r   r   r   video_info_url_age_restricted   s    


rT   )rO   r"   c                 C   s   dt |  S )Nz'https://www.youtube.com/get_video_info?)r   )rO   r   r   r   rN      s    rN   )htmlr"   c              	   C   s>   zt | d d }W n  ttfk
r4   t| }Y nX d| S )zGet the base JavaScript url.

    Construct the base JavaScript url, which contains the decipher
    "transforms".

    :param str html:
        The html contents of the watch page.
    Zassetsjszhttps://youtube.com)get_ytplayer_configKeyErrorr   get_ytplayer_js)rU   Zbase_jsr   r   r   js_url   s
    	rZ   )mime_type_codecr"   c                 C   sL   d}t |}|| }|s(td|d| \}}|dd |dD fS )a  Parse the type data.

    Breaks up the data in the ``type`` key of the manifest, which contains the
    mime type and codecs serialized together, and splits them into separate
    elements.

    **Example**:

    mime_type_codec('audio/webm; codecs="opus"') -> ('audio/webm', ['opus'])

    :param str mime_type_codec:
        String containing mime type and codecs.
    :rtype: tuple
    :returns:
        The mime type and a list of codecs.

    z,(\w+\/\w+)\;\scodecs=\"([a-zA-Z-0-9.,\s]*)\"r[   r:   c                 S   s   g | ]}|  qS r   )strip).0rL   r   r   r   
<listcomp>$  s     z#mime_type_codec.<locals>.<listcomp>,)r=   r>   r?   r   groupssplit)r[   r;   rB   resultsZ	mime_typecodecsr   r   r   r[     s    

r[   c                 C   sV   dg}|D ]:}t |}|| }|r
td| |d}|  S q
tddddS )zGet the YouTube player base JavaScript path.

    :param str html
        The html contents of the watch page.
    :rtype: str
    :returns:
        Path to YouTube's base.js file.
    z'(/s/player/[\w\d]+/[\w\d_/.]+/base\.js)r5   r-   rY   js_url_patternsr:   Nr<   )rU   rd   r;   rB   rC   Zyt_player_jsr   r   r   rY   '  s    




 rY   c                 C   s   t d ddg}|D ]Z}zt| |W   S  tk
rn } z&t d|  t | W Y qW 5 d}~X Y qX qdg}|D ].}zt| |W   S  tk
r   Y q|Y q|X q|tddd	dS )
a  Get the YouTube player configuration data from the watch html.

    Extract the ``ytplayer_config``, which is json data embedded within the
    watch html and serves as the primary source of obtaining the stream
    manifest data.

    :param str html:
        The html contents of the watch page.
    :rtype: str
    :returns:
        Substring of the html containing the encoded manifest data.
    zfinding initial function namezytplayer\.config\s*=\s*ytInitialPlayerResponse\s*=\s*zPattern failed: Nz,yt\.setConfig\(.*['\"]PLAYER_CONFIG['\"]:\s*rW   z#config_patterns, setconfig_patternsr:   )r@   rA   r   r   r   )rU   Zconfig_patternsr;   eZsetconfig_patternsr   r   r   rW   @  s,    

 rW   c              	   C   sr   i }ddg}|D ]@}z"t | |}|D ]}|| q$W q tk
rN   Y qY qX qt|dkrb|S tddddS )a;  Get the entirety of the ytcfg object.

    This is built over multiple pieces, so we have to find all matches and
    combine the dicts together.

    :param str html:
        The html contents of the watch page.
    :rtype: str
    :returns:
        Substring of the html containing the encoded manifest data.
    z
ytcfg\s=\szytcfg\.set\(r   	get_ytcfgZytcfg_pattenrsr:   N)r   updater   lenr   )rU   ZytcfgZytcfg_patternsr;   Zfound_objectsobjr   r   r   rg   n  s"    
 rg   )stream_manifestvid_inforV   r"   c              	   C   s8  t |d}t| D ]\}}z|d }W n2 tk
rZ   |di d}|rVtdY nX d|ks|d|krd|ks|d	|krtd
 q|j|d d}td|d  t|}	t	t|j
}
dd |
 D }
||
d< d|
 krt|
d }||}||
d< |	j d|	j |	j dt|
 }|| | d< qdS )zApply the decrypted signature to the stream manifest.

    :param dict stream_manifest:
        Details of the media streams available.
    :param str js:
        The contents of the base.js asset file.

    )rV   r,   r$   r%   UNKNOWN	signaturesz&sig=z&lsig=zsignature found, skip decipher)Zciphered_signaturez+finished descrambling signature for itag=%sZitagc                 S   s   i | ]\}}||d  qS )r   r   )r]   kvr   r   r   
<dictcomp>  s     z#apply_signature.<locals>.<dictcomp>sigZ
ratebypassnz://?N)r   	enumeraterX   r*   r   r@   rA   Zget_signaturer   r	   r2   itemskeysr/   Zcalculate_nschemenetlocpathr   )rk   rl   rV   cipheristreamr,   Zlive_streamrn   
parsed_urlZquery_paramsZ	initial_nZnew_nr   r   r   apply_signature  sJ    	

 
"r   )stream_datar"   c                 C   s   d| krdS g }d|   kr*|| d  d|   krD|| d  |D ]R}d|krd|krt|d }|d d |d< |d d |d< |dd	k|d
< qHtd |S )a-  Apply various in-place transforms to YouTube's media stream data.

    Creates a ``list`` of dictionaries by string splitting on commas, then
    taking each list item, parsing it as a query string, converting it to a
    ``dict`` and unquoting the value.

    :param dict stream_data:
        Dictionary containing query string encoded values.

    **Example**:

    >>> d = {'foo': 'bar=1&var=test,em=5&t=url%20encoded'}
    >>> apply_descrambler(d, 'foo')
    >>> print(d)
    {'foo': [{'bar': '1', 'var': 'test'}, {'em': '5', 't': 'url encoded'}]}

    r,   NformatsZadaptiveFormatsZsignatureCipherr   ro   typeZFORMAT_STREAM_TYPE_OTFZis_otfzapplying descrambler)rx   extendr	   r*   r@   rA   )r   r   dataZ
cipher_urlr   r   r   apply_descrambler  s     
r   c              	   C   sH   ddg}|D ]*}zt | |W   S  tk
r4   Y qX qtddddS )zExtract the ytInitialData json from the watch_html page.

    This mostly contains metadata necessary for rendering the page on-load,
    such as video information, copyright notices, etc.

    @param watch_html: Html of the watch page
    @return:
    z'window\[['\"]ytInitialData['\"]]\s*=\s*zytInitialData\s*=\s*initial_dataZinitial_data_patternr:   Nr   r   r   r   r9   r;   r   r   r   r     s    
r   c              	   C   sH   ddg}|D ]*}zt | |W   S  tk
r4   Y qX qtddddS )a  Extract the ytInitialPlayerResponse json from the watch_html page.

    This mostly contains metadata necessary for rendering the page on-load,
    such as video information, copyright notices, etc.

    @param watch_html: Html of the watch page
    @return:
    z1window\[['\"]ytInitialPlayerResponse['\"]]\s*=\s*re   r)   Zinitial_player_response_patternr:   Nr   r   r   r   r   r)     s    
r)   )r"   c              	   C   sv   z0| d d d d d d d d d d }W n  t tfk
rP   tg  Y S X td	d
 |}dd |D }t|S )u<  Get the informational metadata for the video.

    e.g.:
    [
        {
            'Song': '강남스타일(Gangnam Style)',
            'Artist': 'PSY',
            'Album': 'PSY SIX RULES Pt.1',
            'Licensed to YouTube by': 'YG Entertainment Inc. [...]'
        }
    ]

    :rtype: YouTubeMetadata
    contentsZtwoColumnWatchNextResultsrb   r-   ZvideoSecondaryInfoRendererZmetadataRowContainerZmetadataRowContainerRendererZrowsc                 S   s   d|   kS )NmetadataRowRenderer)rx   )xr   r   r   <lambda>;      zmetadata.<locals>.<lambda>c                 S   s   g | ]}|d  qS )r   r   )r]   r   r   r   r   r^   A  s     zmetadata.<locals>.<listcomp>)rX   
IndexErrorr   filter)r   Zmetadata_rowsr   r   r   metadata!  s4    
r   )9__doc__loggingurllib.parser0   r=   collectionsr   r   typingr   r   r   r   r   r	   r
   r   r   Zpytube.cipherr   Zpytube.exceptionsr   r   r   Zpytube.helpersr   Zpytube.metadatar   Zpytube.parserr   r   	getLogger__name__r@   strr   r    r!   boolr#   r+   r.   r4   r8   rP   rT   rN   rZ   r[   rY   rW   rg   r   r   r   r)   r   r   r   r   r   <module>   sD   
$ ."9)