release 2014.12.01

[tunein] Use station API
Credit @Tithen-Firion for the myspace changes (#4341 )
2026-03-30 23:15:45 +00:00 · 2014-12-01 17:28:34 +01:00 · 2014-12-01 18:10:15 +02:00 · 2014-12-01 16:15:09 +01:00 · 2014-12-01 20:18:42 +06:00 · 2014-12-01 00:10:12 +01:00
15 changed files with 309 additions and 171 deletions
--- a/1
+++ b/1
@@ -88,3 +88,4 @@ Dao Hoang Son
 Oskar Jauch
 Matthew Rayfield
 t0mm0
+Tithen-Firion
--- a/test/test_subtitles.py
+++ b/test/test_subtitles.py
@@ -238,7 +238,7 @@ class TestVimeoSubtitles(BaseTestSubtitles):
    def test_subtitles(self):
        self.DL.params['writesubtitles'] = True
        subtitles = self.getSubtitles()
-        self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
+        self.assertEqual(md5(subtitles['en']), '26399116d23ae3cf2c087cea94bc43b4')

    def test_subtitles_lang(self):
        self.DL.params['writesubtitles'] = True
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@@ -242,7 +242,7 @@ from .muenchentv import MuenchenTVIE
 from .musicplayon import MusicPlayOnIE
 from .musicvault import MusicVaultIE
 from .muzu import MuzuTVIE
-from .myspace import MySpaceIE
+from .myspace import MySpaceIE, MySpaceAlbumIE
 from .myspass import MySpassIE
 from .myvideo import MyVideoIE
 from .naver import NaverIE
--- a/youtube_dl/extractor/bbccouk.py
+++ b/youtube_dl/extractor/bbccouk.py
@@ -1,9 +1,11 @@
 from __future__ import unicode_literals

 import re
+import xml.etree.ElementTree

 from .subtitles import SubtitlesInfoExtractor
 from ..utils import ExtractorError
+from ..compat import compat_HTTPError


 class BBCCoUkIE(SubtitlesInfoExtractor):
@@ -55,7 +57,22 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
                'skip_download': True,
            },
            'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
-        }
+        },
+        {
+            'url': 'http://www.bbc.co.uk/iplayer/episode/p026c7jt/tomorrows-worlds-the-unearthly-history-of-science-fiction-2-invasion',
+            'info_dict': {
+                'id': 'b03k3pb7',
+                'ext': 'flv',
+                'title': "Tomorrow's Worlds: The Unearthly History of Science Fiction",
+                'description': '2. Invasion',
+                'duration': 3600,
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            },
+            'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
+        },
    ]

    def _extract_asx_playlist(self, connection, programme_id):
@@ -102,6 +119,10 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
        return playlist.findall('./{http://bbc.co.uk/2008/emp/playlist}item')

    def _extract_medias(self, media_selection):
+        error = media_selection.find('./{http://bbc.co.uk/2008/mp/mediaselection}error')
+        if error is not None:
+            raise ExtractorError(
+                '%s returned error: %s' % (self.IE_NAME, error.get('id')), expected=True)
        return media_selection.findall('./{http://bbc.co.uk/2008/mp/mediaselection}media')

    def _extract_connections(self, media):
@@ -158,54 +179,73 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
            subtitles[lang] = srt
        return subtitles

-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        group_id = mobj.group('id')
-
-        webpage = self._download_webpage(url, group_id, 'Downloading video page')
-        if re.search(r'id="emp-error" class="notinuk">', webpage):
-            raise ExtractorError('Currently BBC iPlayer TV programmes are available to play in the UK only',
-                                 expected=True)
-
-        playlist = self._download_xml('http://www.bbc.co.uk/iplayer/playlist/%s' % group_id, group_id,
-                                      'Downloading playlist XML')
-
-        no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
-        if no_items is not None:
-            reason = no_items.get('reason')
-            if reason == 'preAvailability':
-                msg = 'Episode %s is not yet available' % group_id
-            elif reason == 'postAvailability':
-                msg = 'Episode %s is no longer available' % group_id
+    def _download_media_selector(self, programme_id):
+        try:
+            media_selection = self._download_xml(
+                'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s' % programme_id,
+                programme_id, 'Downloading media selection XML')
+        except ExtractorError as ee:
+            if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
+                media_selection = xml.etree.ElementTree.fromstring(ee.cause.read().encode('utf-8'))
            else:
-                msg = 'Episode %s is not available: %s' % (group_id, reason)
-            raise ExtractorError(msg, expected=True)
+                raise

        formats = []
        subtitles = None

-        for item in self._extract_items(playlist):
-            kind = item.get('kind')
-            if kind != 'programme' and kind != 'radioProgramme':
-                continue
-            title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
-            description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
+        for media in self._extract_medias(media_selection):
+            kind = media.get('kind')
+            if kind == 'audio':
+                formats.extend(self._extract_audio(media, programme_id))
+            elif kind == 'video':
+                formats.extend(self._extract_video(media, programme_id))
+            elif kind == 'captions':
+                subtitles = self._extract_captions(media, programme_id)

-            programme_id = item.get('identifier')
-            duration = int(item.get('duration'))
+        return formats, subtitles

-            media_selection = self._download_xml(
-                'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s' % programme_id,
-                programme_id, 'Downloading media selection XML')
+    def _real_extract(self, url):
+        group_id = self._match_id(url)

-            for media in self._extract_medias(media_selection):
-                kind = media.get('kind')
-                if kind == 'audio':
-                    formats.extend(self._extract_audio(media, programme_id))
-                elif kind == 'video':
-                    formats.extend(self._extract_video(media, programme_id))
-                elif kind == 'captions':
-                    subtitles = self._extract_captions(media, programme_id)
+        webpage = self._download_webpage(url, group_id, 'Downloading video page')
+
+        programme_id = self._search_regex(
+            r'"vpid"\s*:\s*"([\da-z]{8})"', webpage, 'vpid', fatal=False)
+        if programme_id:
+            player = self._download_json(
+                'http://www.bbc.co.uk/iplayer/episode/%s.json' % group_id,
+                group_id)['jsConf']['player']
+            title = player['title']
+            description = player['subtitle']
+            duration = player['duration']
+            formats, subtitles = self._download_media_selector(programme_id)
+        else:
+            playlist = self._download_xml(
+                'http://www.bbc.co.uk/iplayer/playlist/%s' % group_id,
+                group_id, 'Downloading playlist XML')
+
+            no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
+            if no_items is not None:
+                reason = no_items.get('reason')
+                if reason == 'preAvailability':
+                    msg = 'Episode %s is not yet available' % group_id
+                elif reason == 'postAvailability':
+                    msg = 'Episode %s is no longer available' % group_id
+                elif reason == 'noMedia':
+                    msg = 'Episode %s is not currently available' % group_id
+                else:
+                    msg = 'Episode %s is not available: %s' % (group_id, reason)
+                raise ExtractorError(msg, expected=True)
+
+            for item in self._extract_items(playlist):
+                kind = item.get('kind')
+                if kind != 'programme' and kind != 'radioProgramme':
+                    continue
+                title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
+                description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
+                programme_id = item.get('identifier')
+                duration = int(item.get('duration'))
+                formats, subtitles = self._download_media_selector(programme_id)

        if self._downloader.params.get('listsubtitles', False):
            self._list_available_subtitles(programme_id, subtitles)
--- a/youtube_dl/extractor/bliptv.py
+++ b/youtube_dl/extractor/bliptv.py
@@ -64,6 +64,20 @@ class BlipTVIE(SubtitlesInfoExtractor):
                'uploader': 'redvsblue',
                'uploader_id': '792887',
            }
+        },
+        {
+            'url': 'http://blip.tv/play/gbk766dkj4Yn',
+            'md5': 'fe0a33f022d49399a241e84a8ea8b8e3',
+            'info_dict': {
+                'id': '1749452',
+                'ext': 'mp4',
+                'upload_date': '20090208',
+                'description': 'Witness the first appearance of the Nostalgia Critic character, as Doug reviews the movie Transformers.',
+                'title': 'Nostalgia Critic: Transformers',
+                'timestamp': 1234068723,
+                'uploader': 'NostalgiaCritic',
+                'uploader_id': '246467',
+            }
        }
    ]

@@ -74,11 +88,13 @@ class BlipTVIE(SubtitlesInfoExtractor):
        # See https://github.com/rg3/youtube-dl/issues/857 and
        # https://github.com/rg3/youtube-dl/issues/4197
        if lookup_id:
-            info_page = self._download_webpage(
-                'http://blip.tv/play/%s.x?p=1' % lookup_id, lookup_id, 'Resolving lookup id')
-            video_id = self._search_regex(r'config\.id\s*=\s*"([0-9]+)', info_page, 'video_id')
-        else:
-            video_id = mobj.group('id')
+            urlh = self._request_webpage(
+                'http://blip.tv/play/%s' % lookup_id, lookup_id, 'Resolving lookup id')
+            url = compat_urlparse.urlparse(urlh.geturl())
+            qs = compat_urlparse.parse_qs(url.query)
+            mobj = re.match(self._VALID_URL, qs['file'][0])
+
+        video_id = mobj.group('id')

        rss = self._download_xml('http://blip.tv/rss/flash/%s' % video_id, video_id, 'Downloading video RSS')

@@ -114,7 +130,7 @@ class BlipTVIE(SubtitlesInfoExtractor):
            msg = self._download_webpage(
                url + '?showplayer=20140425131715&referrer=http://blip.tv&mask=7&skin=flashvars&view=url',
                video_id, 'Resolving URL for %s' % role)
-            real_url = compat_urlparse.parse_qs(msg)['message'][0]
+            real_url = compat_urlparse.parse_qs(msg.strip())['message'][0]

            media_type = media_content.get('type')
            if media_type == 'text/srt' or url.endswith('.srt'):
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -13,6 +13,7 @@ import time
 import xml.etree.ElementTree

 from ..compat import (
+    compat_cookiejar,
    compat_http_client,
    compat_urllib_error,
    compat_urllib_parse_urlparse,
@@ -817,6 +818,11 @@ class InfoExtractor(object):
                self._downloader.report_warning(msg)
        return res

+    def _set_cookie(self, domain, name, value, expire_time=None):
+        cookie = compat_cookiejar.Cookie(0, name, value, None, None, domain, None,
+            None, '/', True, False, expire_time, '', None, None, None)
+        self._downloader.cookiejar.set_cookie(cookie)
+

 class SearchInfoExtractor(InfoExtractor):
    """
--- a/youtube_dl/extractor/myspace.py
+++ b/youtube_dl/extractor/myspace.py
@@ -1,3 +1,4 @@
+# encoding: utf-8
 from __future__ import unicode_literals

 import re
@@ -7,6 +8,7 @@ from .common import InfoExtractor
 from ..compat import (
    compat_str,
 )
+from ..utils import ExtractorError


 class MySpaceIE(InfoExtractor):
@@ -14,33 +16,58 @@ class MySpaceIE(InfoExtractor):

    _TESTS = [
        {
-            'url': 'https://myspace.com/coldplay/video/viva-la-vida/100008689',
+            'url': 'https://myspace.com/fiveminutestothestage/video/little-big-town/109594919',
            'info_dict': {
-                'id': '100008689',
+                'id': '109594919',
                'ext': 'flv',
-                'title': 'Viva La Vida',
-                'description': 'The official Viva La Vida video, directed by Hype Williams',
-                'uploader': 'Coldplay',
-                'uploader_id': 'coldplay',
+                'title': 'Little Big Town',
+                'description': 'This country quartet was all smiles while playing a sold out show at the Pacific Amphitheatre in Orange County, California.',
+                'uploader': 'Five Minutes to the Stage',
+                'uploader_id': 'fiveminutestothestage',
            },
            'params': {
                # rtmp download
                'skip_download': True,
            },
        },
-        # song
+        # songs
        {
-            'url': 'https://myspace.com/spiderbags/music/song/darkness-in-my-heart-39008454-27041242',
+            'url': 'https://myspace.com/killsorrow/music/song/of-weakened-soul...-93388656-103880681',
            'info_dict': {
-                'id': '39008454',
+                'id': '93388656',
                'ext': 'flv',
-                'title': 'Darkness In My Heart',
-                'uploader_id': 'spiderbags',
+                'title': 'Of weakened soul...',
+                'uploader': 'Killsorrow',
+                'uploader_id': 'killsorrow',
            },
            'params': {
                # rtmp download
                'skip_download': True,
            },
+        }, {
+            'add_ie': ['Vevo'],
+            'url': 'https://myspace.com/threedaysgrace/music/song/animal-i-have-become-28400208-28218041',
+            'info_dict': {
+                'id': 'USZM20600099',
+                'ext': 'mp4',
+                'title': 'Animal I Have Become',
+                'uploader': 'Three Days Grace',
+                'timestamp': int,
+                'upload_date': '20060502',
+            },
+            'skip': 'VEVO is only available in some countries',
+        }, {
+            'add_ie': ['Youtube'],
+            'url': 'https://myspace.com/starset2/music/song/first-light-95799905-106964426',
+            'info_dict': {
+                'id': 'ypWvQgnJrSU',
+                'ext': 'mp4',
+                'title': 'Starset - First Light',
+                'description': 'md5:2d5db6c9d11d527683bcda818d332414',
+                'uploader': 'Jacob Soren',
+                'uploader_id': 'SorenPromotions',
+                'upload_date': '20140725',
+            }
        },
    ]

@@ -48,16 +75,40 @@ class MySpaceIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
+        player_url = self._search_regex(
+            r'playerSwf":"([^"?]*)', webpage, 'player URL')

        if mobj.group('mediatype').startswith('music/song'):
            # songs don't store any useful info in the 'context' variable
+            song_data = self._search_regex(
+                r'''<button.*data-song-id=(["\'])%s\1.*''' % video_id,
+                webpage, 'song_data', default=None, group=0)
+            if song_data is None:
+                # some songs in an album are not playable
+                self.report_warning(
+                    '%s: No downloadable song on this page' % video_id)
+                return
            def search_data(name):
                return self._search_regex(
-                    r'data-%s="(.*?)"' % name, webpage, name)
+                    r'''data-%s=([\'"])(?P<data>.*?)\1''' % name,
+                    song_data, name, default='', group='data')
            streamUrl = search_data('stream-url')
+            if not streamUrl:
+                vevo_id = search_data('vevo-id')
+                youtube_id = search_data('youtube-id')
+                if vevo_id:
+                    self.to_screen('Vevo video detected: %s' % vevo_id)
+                    return self.url_result('vevo:%s' % vevo_id, ie='Vevo')
+                elif youtube_id:
+                    self.to_screen('Youtube video detected: %s' % youtube_id)
+                    return self.url_result(youtube_id, ie='Youtube')
+                else:
+                    raise ExtractorError(
+                        'Found song but don\'t know how to download it')
            info = {
                'id': video_id,
                'title': self._og_search_title(webpage),
+                'uploader': search_data('artist-name'),
                'uploader_id': search_data('artist-username'),
                'thumbnail': self._og_search_thumbnail(webpage),
            }
@@ -79,6 +130,50 @@ class MySpaceIE(InfoExtractor):
        info.update({
            'url': rtmp_url,
            'play_path': play_path,
+            'player_url': player_url,
            'ext': 'flv',
        })
        return info
+
+
+class MySpaceAlbumIE(InfoExtractor):
+    IE_NAME = 'MySpace:album'
+    _VALID_URL = r'https?://myspace\.com/([^/]+)/music/album/(?P<title>.*-)(?P<id>\d+)'
+
+    _TESTS = [{
+        'url': 'https://myspace.com/starset2/music/album/transmissions-19455773',
+        'info_dict': {
+            'title': 'Transmissions',
+            'id': '19455773',
+        },
+        'playlist_count': 14,
+        'skip': 'this album is only available in some countries',
+    }, {
+        'url': 'https://myspace.com/killsorrow/music/album/the-demo-18596029',
+        'info_dict': {
+            'title': 'The Demo',
+            'id': '18596029',
+        },
+        'playlist_count': 5,
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        playlist_id = mobj.group('id')
+        display_id = mobj.group('title') + playlist_id
+        webpage = self._download_webpage(url, display_id)
+        tracks_paths = re.findall(r'"music:song" content="(.*?)"', webpage)
+        if not tracks_paths:
+            raise ExtractorError(
+                '%s: No songs found, try using proxy' % display_id,
+                expected=True)
+        entries = [
+            self.url_result(t_path, ie=MySpaceIE.ie_key())
+            for t_path in tracks_paths]
+        return {
+            '_type': 'playlist',
+            'id': playlist_id,
+            'display_id': display_id,
+            'title': self._og_search_title(webpage),
+            'entries': entries,
+        }
--- a/youtube_dl/extractor/noco.py
+++ b/youtube_dl/extractor/noco.py
@@ -20,6 +20,7 @@ class NocoIE(InfoExtractor):
    _VALID_URL = r'http://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)'
    _LOGIN_URL = 'http://noco.tv/do.php'
    _API_URL_TEMPLATE = 'https://api.noco.tv/1.1/%s?ts=%s&tk=%s'
+    _SUB_LANG_TEMPLATE = '&sub_lang=%s'
    _NETRC_MACHINE = 'noco'

    _TEST = {
@@ -60,10 +61,12 @@ class NocoIE(InfoExtractor):
        if 'erreur' in login:
            raise ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True)

-    def _call_api(self, path, video_id, note):
+    def _call_api(self, path, video_id, note, sub_lang=None):
        ts = compat_str(int(time.time() * 1000))
        tk = hashlib.md5((hashlib.md5(ts.encode('ascii')).hexdigest() + '#8S?uCraTedap6a').encode('ascii')).hexdigest()
        url = self._API_URL_TEMPLATE % (path, ts, tk)
+        if sub_lang:
+            url += self._SUB_LANG_TEMPLATE % sub_lang

        resp = self._download_json(url, video_id, note)

@@ -91,31 +94,34 @@ class NocoIE(InfoExtractor):

        formats = []

-        for format_id, fmt in medias['fr']['video_list']['none']['quality_list'].items():
+        for lang, lang_dict in medias['fr']['video_list'].items():
+            for format_id, fmt in lang_dict['quality_list'].items():
+                format_id_extended = '%s-%s' % (lang, format_id) if lang != 'none' else format_id

-            video = self._call_api(
-                'shows/%s/video/%s/fr' % (video_id, format_id.lower()),
-                video_id, 'Downloading %s video JSON' % format_id)
+                video = self._call_api(
+                    'shows/%s/video/%s/fr' % (video_id, format_id.lower()),
+                    video_id, 'Downloading %s video JSON' % format_id_extended,
+                    lang if lang != 'none' else None)

-            file_url = video['file']
-            if not file_url:
-                continue
+                file_url = video['file']
+                if not file_url:
+                    continue

-            if file_url in ['forbidden', 'not found']:
-                popmessage = video['popmessage']
-                self._raise_error(popmessage['title'], popmessage['message'])
+                if file_url in ['forbidden', 'not found']:
+                    popmessage = video['popmessage']
+                    self._raise_error(popmessage['title'], popmessage['message'])

-            formats.append({
-                'url': file_url,
-                'format_id': format_id,
-                'width': fmt['res_width'],
-                'height': fmt['res_lines'],
-                'abr': fmt['audiobitrate'],
-                'vbr': fmt['videobitrate'],
-                'filesize': fmt['filesize'],
-                'format_note': qualities[format_id]['quality_name'],
-                'preference': qualities[format_id]['priority'],
-            })
+                formats.append({
+                    'url': file_url,
+                    'format_id': format_id_extended,
+                    'width': fmt['res_width'],
+                    'height': fmt['res_lines'],
+                    'abr': fmt['audiobitrate'],
+                    'vbr': fmt['videobitrate'],
+                    'filesize': fmt['filesize'],
+                    'format_note': qualities[format_id]['quality_name'],
+                    'preference': qualities[format_id]['priority'],
+                })

        self._sort_formats(formats)

--- a/youtube_dl/extractor/playvid.py
+++ b/youtube_dl/extractor/playvid.py
@@ -4,6 +4,8 @@ import re

 from .common import InfoExtractor
 from ..utils import (
+    ExtractorError,
+    clean_html,
    compat_urllib_parse,
 )

@@ -28,6 +30,11 @@ class PlayvidIE(InfoExtractor):

        webpage = self._download_webpage(url, video_id)

+        m_error = re.search(
+            r'<div class="block-error">\s*<div class="heading">\s*<div>(?P<msg>.+?)</div>\s*</div>', webpage)
+        if m_error:
+            raise ExtractorError(clean_html(m_error.group('msg')), expected=True)
+
        video_title = None
        duration = None
        video_thumbnail = None
--- a/youtube_dl/extractor/slideshare.py
+++ b/youtube_dl/extractor/slideshare.py
@@ -39,7 +39,7 @@ class SlideshareIE(InfoExtractor):
        ext = info['jsplayer']['video_extension']
        video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext)
        description = self._html_search_regex(
-            r'<p\s+(?:style="[^"]*"\s+)?class="description.*?"[^>]*>(.*?)</p>', webpage,
+            r'<p\s+(?:style="[^"]*"\s+)?class=".*?description.*?"[^>]*>(.*?)</p>', webpage,
            'description', fatal=False)

        return {
--- a/youtube_dl/extractor/tunein.py
+++ b/youtube_dl/extractor/tunein.py
@@ -19,6 +19,7 @@ class TuneInIE(InfoExtractor):
        |tun\.in/(?P<redirect_id>[A-Za-z0-9]+)
    )
    '''
+    _API_URL_TEMPLATE = 'http://tunein.com/tuner/tune/?stationId={0:}&tuneType=Station'

    _INFO_DICT = {
        'id': '34682',
@@ -56,13 +57,10 @@ class TuneInIE(InfoExtractor):
            mobj = re.match(self._VALID_URL, url)
        station_id = mobj.group('id')

-        webpage = self._download_webpage(
-            url, station_id, note='Downloading station webpage')
+        station_info = self._download_json(
+            self._API_URL_TEMPLATE.format(station_id),
+            station_id, note='Downloading station JSON')

-        payload = self._html_search_regex(
-            r'(?m)TuneIn\.payload\s*=\s*(\{[^$]+?)$', webpage, 'JSON data')
-        json_data = json.loads(payload)
-        station_info = json_data['Station']['broadcast']
        title = station_info['Title']
        thumbnail = station_info.get('Logo')
        location = station_info.get('Location')
--- a/youtube_dl/extractor/vevo.py
+++ b/youtube_dl/extractor/vevo.py
@@ -13,7 +13,7 @@ from ..utils import (
 class VevoIE(InfoExtractor):
    """
    Accepts urls from vevo.com or in the format 'vevo:{id}'
-    (currently used by MTVIE)
+    (currently used by MTVIE and MySpaceIE)
    """
    _VALID_URL = r'''(?x)
        (?:https?://www\.vevo\.com/watch/(?:[^/]+/(?:[^/]+/)?)?|
--- a/youtube_dl/extractor/xminus.py
+++ b/youtube_dl/extractor/xminus.py
@@ -50,7 +50,7 @@ class XMinusIE(InfoExtractor):
            webpage, 'view count', fatal=False))

        enc_token = self._html_search_regex(
-            r'data-mt="(.*?)"', webpage, 'enc_token')
+            r'minus_track\.tkn="(.+?)"', webpage, 'enc_token')
        token = ''.join(
            c if pos == 3 else compat_chr(compat_ord(c) - 1)
            for pos, c in enumerate(reversed(enc_token)))
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -7,6 +7,7 @@ import itertools
 import json
 import os.path
 import re
+import time
 import traceback

 from .common import InfoExtractor, SearchInfoExtractor
@@ -38,17 +39,14 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
    """Provide base functions for Youtube extractors"""
    _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
    _TWOFACTOR_URL = 'https://accounts.google.com/SecondFactor'
-    _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
-    _AGE_URL = 'https://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
    _NETRC_MACHINE = 'youtube'
    # If True it will raise an error if no login info is provided
    _LOGIN_REQUIRED = False

    def _set_language(self):
-        return bool(self._download_webpage(
-            self._LANG_URL, None,
-            note='Setting language', errnote='unable to set language',
-            fatal=False))
+        self._set_cookie('.youtube.com', 'PREF', 'f1=50000000&hl=en',
+            # YouTube sets the expire time to about two months
+            expire_time=time.time() + 60*24*3600)

    def _login(self):
        """
@@ -176,30 +174,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
            return False
        return True

-    def _confirm_age(self):
-        age_form = {
-            'next_url': '/',
-            'action_confirm': 'Confirm',
-        }
-        req = compat_urllib_request.Request(
-            self._AGE_URL,
-            compat_urllib_parse.urlencode(age_form).encode('ascii')
-        )
-
-        self._download_webpage(
-            req, None,
-            note='Confirming age', errnote='Unable to confirm age',
-            fatal=False)
-
    def _real_initialize(self):
        if self._downloader is None:
            return
-        if self._get_login_info()[0] is not None:
-            if not self._set_language():
-                return
+        self._set_language()
        if not self._login():
            return
-        self._confirm_age()


 class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
@@ -305,6 +285,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
        '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
        '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
        '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
+        '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'VP9'},

        # Dash webm audio
        '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
@@ -398,8 +379,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
            'info_dict': {
                'id': 'IB3lcPjvWLA',
                'ext': 'm4a',
-                'title': 'Afrojack - The Spark ft. Spree Wilson',
-                'description': 'md5:9717375db5a9a3992be4668bbf3bc0a8',
+                'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
+                'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d',
                'uploader': 'AfrojackVEVO',
                'uploader_id': 'AfrojackVEVO',
                'upload_date': '20131011',
@@ -421,7 +402,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                'title': 'Burning Everyone\'s Koran',
                'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
            }
-        }
+        },
+        # Normal age-gate video (No vevo, embed allowed)
+        {
+            'url': 'http://youtube.com/watch?v=HtVdAasjOgU',
+            'info_dict': {
+                'id': 'HtVdAasjOgU',
+                'ext': 'mp4',
+                'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
+                'description': 'md5:eca57043abae25130f58f655ad9a7771',
+                'uploader': 'The Witcher',
+                'uploader_id': 'WitcherGame',
+                'upload_date': '20140605',
+            },
+        },
    ]

    def __init__(self, *args, **kwargs):
@@ -684,16 +678,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):

        # Get video webpage
        url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
-        pref_cookies = [
-            c for c in self._downloader.cookiejar
-            if c.domain == '.youtube.com' and c.name == 'PREF']
-        for pc in pref_cookies:
-            if 'hl=' in pc.value:
-                pc.value = re.sub(r'hl=[^&]+', 'hl=en', pc.value)
-            else:
-                if pc.value:
-                    pc.value += '&'
-                pc.value += 'hl=en'
        video_webpage = self._download_webpage(url, video_id)

        # Attempt to extract SWF player URL
@@ -704,7 +688,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
            player_url = None

        # Get video info
-        self.report_video_info_webpage_download(video_id)
        if re.search(r'player-age-gate-content">', video_webpage) is not None:
            age_gate = True
            # We simulate the access to the video from www.youtube.com/v/{video_id}
@@ -723,15 +706,30 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
            video_info = compat_parse_qs(video_info_webpage)
        else:
            age_gate = False
-            for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
-                video_info_url = (proto + '://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
-                                  % (video_id, el_type))
-                video_info_webpage = self._download_webpage(video_info_url, video_id,
-                                                            note=False,
-                                                            errnote='unable to download video info webpage')
-                video_info = compat_parse_qs(video_info_webpage)
-                if 'token' in video_info:
-                    break
+            try:
+                # Try looking directly into the video webpage
+                mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});', video_webpage)
+                if not mobj:
+                    raise ValueError('Could not find ytplayer.config')  # caught below
+                json_code = uppercase_escape(mobj.group(1))
+                ytplayer_config = json.loads(json_code)
+                args = ytplayer_config['args']
+                # Convert to the same format returned by compat_parse_qs
+                video_info = dict((k, [v]) for k, v in args.items())
+                if 'url_encoded_fmt_stream_map' not in args:
+                    raise ValueError('No stream_map present')  # caught below
+            except ValueError:
+                # We fallback to the get_video_info pages (used by the embed page)
+                self.report_video_info_webpage_download(video_id)
+                for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
+                    video_info_url = (proto + '://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
+                        % (video_id, el_type))
+                    video_info_webpage = self._download_webpage(video_info_url,
+                        video_id, note=False,
+                        errnote='unable to download video info webpage')
+                    video_info = compat_parse_qs(video_info_webpage)
+                    if 'token' in video_info:
+                        break
        if 'token' not in video_info:
            if 'reason' in video_info:
                raise ExtractorError(
@@ -856,32 +854,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
        if self._downloader.params.get('writeannotations', False):
            video_annotations = self._extract_annotations(video_id)

-        # Decide which formats to download
-        try:
-            mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});', video_webpage)
-            if not mobj:
-                raise ValueError('Could not find vevo ID')
-            json_code = uppercase_escape(mobj.group(1))
-            ytplayer_config = json.loads(json_code)
-            args = ytplayer_config['args']
-            # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
-            # this signatures are encrypted
-            if 'url_encoded_fmt_stream_map' not in args:
-                raise ValueError('No stream_map present')  # caught below
-            re_signature = re.compile(r'[&,]s=')
-            m_s = re_signature.search(args['url_encoded_fmt_stream_map'])
-            if m_s is not None:
-                self.to_screen('%s: Encrypted signatures detected.' % video_id)
-                video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
-            m_s = re_signature.search(args.get('adaptive_fmts', ''))
-            if m_s is not None:
-                if 'adaptive_fmts' in video_info:
-                    video_info['adaptive_fmts'][0] += ',' + args['adaptive_fmts']
-                else:
-                    video_info['adaptive_fmts'] = [args['adaptive_fmts']]
-        except ValueError:
-            pass
-
        def _map_to_format_list(urlmap):
            formats = []
            for itag, video_real_url in urlmap.items():
@@ -974,10 +946,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                # However, in the case of an age restriction there won't be any embedded dashmpd in the video_webpage.
                # Luckily, it seems, this case uses some kind of default signature (len == 86), so the
                # combination of get_video_info and the _static_decrypt_signature() decryption fallback will work here.
-                if age_gate:
-                    dash_manifest_url = video_info.get('dashmpd')[0]
-                else:
-                    dash_manifest_url = ytplayer_config['args']['dashmpd']
+                dash_manifest_url = video_info.get('dashmpd')[0]

                def decrypt_sig(mobj):
                    s = mobj.group(1)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals

-__version__ = '2014.11.27'
+__version__ = '2014.12.01'
Author	SHA1	Message	Date
Philipp Hagemeister	df015c69ea	release 2014.12.01	2014-12-01 17:28:34 +01:00
Naglis Jonaitis	1434bffa1f	[tunein] Use station API	2014-12-01 18:10:15 +02:00
Jaime Marquínez Ferrándiz	94aa25b995	Credit @Tithen-Firion for the myspace changes (#4341 )	2014-12-01 16:15:09 +01:00
Sergey M․	d128cfe393	[slideshare] Fix description extraction	2014-12-01 20:18:42 +06:00
Jaime Marquínez Ferrándiz	954f36f890	[myspace] Cleanup	2014-12-01 00:10:12 +01:00
Jaime Marquínez Ferrándiz	19e92770c9	[myspace] Replace removed test video and fix the others	2014-12-01 00:10:12 +01:00
Tithen-Firion	95c673a148	[myspace] Add extractor for albums	2014-12-01 00:10:12 +01:00
Tithen-Firion	a196a53265	[myspace] Update tests	2014-12-01 00:10:12 +01:00
Tithen-Firion	3266f0c68e	[myspace] Redirect to other extractors There are many songs just linked from Vevo/YouTube to MySpace. Vevo example: https://myspace.com/threedaysgrace/music/song/animal-i-have-become-28400208-28218041 YouTube example: https://myspace.com/starset2/music/song/first-light-95799905-106964426	2014-12-01 00:10:12 +01:00
Tithen-Firion	1940fadd53	[myspace] Handle non-playable songs I'm adding this because sometimes there is a song page, but you cannot play it. Example: https://myspace.com/starset2/music/song/let-it-die-maniac-agenda-remix-bonus-track-95799916-106964439 It will be useful for downloading whole album with songs like this.	2014-12-01 00:10:11 +01:00
Tithen-Firion	03fd72d996	[myspace] Add more data to info dict `uploader` is an artist `playlist` is an album	2014-12-01 00:10:11 +01:00
Tithen-Firion	f2b44a2513	[myspace] Use player_url for faster download It keeps reconnecting without it. Download time decreased from 7+ minutes to 25 seconds for me.	2014-12-01 00:10:11 +01:00
Jaime Marquínez Ferrándiz	c522adb1f0	[youtube] Add a normal age-gate test video	2014-11-30 21:45:49 +01:00
Jaime Marquínez Ferrándiz	7160532d41	[youtube] Simplify code for getting the dash manifest url video_info contains now the 'ytplayer.config.args' dictionary	2014-11-30 21:07:50 +01:00
Jaime Marquínez Ferrándiz	4e62ebe250	[youtube] Try to extract the video_info from the webpage before requesting the 'get_video_info' pages The YouTube player doesn't seem to use them except for embedded videos, so we can skip a network request. But they still provide better error mesagges (for removed videos for example).	2014-11-30 20:56:32 +01:00
Jaime Marquínez Ferrándiz	4472f84f0c	[test/test_subtitles] Update checksum for vimeo subtitle file	2014-11-30 19:42:54 +01:00
Jaime Marquínez Ferrándiz	b766eb2707	[youtube] Update test	2014-11-30 19:18:39 +01:00
Jaime Marquínez Ferrándiz	10a404c335	[youtube] Add format 313 (fixes #4339 )	2014-11-30 18:56:14 +01:00
Sergey M․	c056efa2e3	[bbccouk] Fix extraction (#4104 , #4214 )	2014-11-30 22:37:56 +06:00
Philipp Hagemeister	283ac8d592	Merge pull request #4338 from t0mm0/x-minus-fix [xminus] update tkn extraction regex	2014-11-30 17:11:05 +01:00
t0mm0	313d4572ce	[xminus] update tkn extraction regex	2014-11-30 16:04:04 +00:00
Jaime Marquínez Ferrándiz	42939b6129	[youtube] Use a cookie for seeting the language This way, we don't have to do an aditional request	2014-11-30 00:03:59 +01:00
Jaime Marquínez Ferrándiz	37ea8164d3	[youtube] Don't confirm age when initializing It seems that all the videos with age restriction use now the age gate method, which doesn't require any confirmation.	2014-11-29 23:46:39 +01:00
Jaime Marquínez Ferrándiz	8c810a7db3	Merge pull request #4333 from ymln/bliptv-fixes [bliptv] Fix some videos not downloading	2014-11-29 20:20:45 +01:00
Yuriy Melnyk	248a0b890f	[bliptv] Fix \n\n at the end of real_url See https://github.com/rg3/youtube-dl/issues/3544#issuecomment-53166516	2014-11-29 19:17:56 +02:00
Yuriy Melnyk	96b7c7fe3f	[bliptv] Fix resolution of lookup id in some videos In some videos (for example, http://blip.tv/play/gbk766dkj4Yn) resolving lookup id would fail, because page at http://blip.tv/play/gbk766dkj4Yn.x?p=1 would have no "config.id" in it. Fixed by requesting different URL and inspecting the URL which the client is redirected to.	2014-11-29 19:17:56 +02:00
Sergey M․	e987e91fcc	[playvid] Capture and output error message	2014-11-29 22:16:35 +06:00
Sergey M․	cb6444e197	[noco] Add support for multi language videos (Closes #4326 )	2014-11-28 20:38:47 +06:00