release 2014.04.07.3

[utils] Completely rewrite Windows output (Fixes #2672 )
release 2014.04.07.2
2026-04-23 19:13:16 +00:00 · 2014-04-07 22:48:45 +02:00 · 2014-04-07 22:48:13 +02:00 · 2014-04-07 21:41:20 +02:00 · 2014-04-07 21:40:34 +02:00 · 2014-04-07 19:57:51 +02:00
22 changed files with 484 additions and 111 deletions
--- a/test/test_playlists.py
+++ b/test/test_playlists.py
@@ -324,7 +324,6 @@ class TestPlaylists(unittest.TestCase):
        self.assertEqual(result['id'], '342759')
        self.assertEqual(
            result['title'], 'General Motors Ignition Switch Recall')
        self.assertEqual(len(result['entries']), 9)
        whole_duration = sum(e['duration'] for e in result['entries'])
        self.assertEqual(whole_duration, 14855)
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -38,6 +38,7 @@ from youtube_dl.utils import (
    xpath_with_ns,
    parse_iso8601,
    strip_jsonp,
    uppercase_escape,
 )
 if sys.version_info < (3, 0):
@@ -279,6 +280,9 @@ class TestUtil(unittest.TestCase):
        d = json.loads(stripped)
        self.assertEqual(d, [{"id": "532cb", "x": 3}])
    def test_uppercase_escpae(self):
        self.assertEqual(uppercase_escape(u'aä'), u'aä')
        self.assertEqual(uppercase_escape(u'\\U0001d550'), u'𝕐')
 if __name__ == '__main__':
    unittest.main()
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -286,6 +286,9 @@ class YoutubeDL(object):
        """Print message to stdout if not in quiet mode."""
        return self.to_stdout(message, skip_eol, check_quiet=True)
    def _write_string(self, s, out=None):
        write_string(s, out=out, encoding=self.params.get('encoding'))
    def to_stdout(self, message, skip_eol=False, check_quiet=False):
        """Print message to stdout if not in quiet mode."""
        if self.params.get('logger'):
@@ -295,7 +298,7 @@ class YoutubeDL(object):
            terminator = ['\n', ''][skip_eol]
            output = message + terminator
-            write_string(output, self._screen_file)
+            self._write_string(output, self._screen_file)
    def to_stderr(self, message):
        """Print message to stderr."""
@@ -305,7 +308,7 @@ class YoutubeDL(object):
        else:
            message = self._bidi_workaround(message)
            output = message + '\n'
-            write_string(output, self._err_file)
+            self._write_string(output, self._err_file)
    def to_console_title(self, message):
        if not self.params.get('consoletitle', False):
@@ -315,21 +318,21 @@ class YoutubeDL(object):
            # already of type unicode()
            ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
        elif 'TERM' in os.environ:
-            write_string('\033]0;%s\007' % message, self._screen_file)
+            self._write_string('\033]0;%s\007' % message, self._screen_file)
    def save_console_title(self):
        if not self.params.get('consoletitle', False):
            return
        if 'TERM' in os.environ:
            # Save the title on stack
-            write_string('\033[22;0t', self._screen_file)
+            self._write_string('\033[22;0t', self._screen_file)
    def restore_console_title(self):
        if not self.params.get('consoletitle', False):
            return
        if 'TERM' in os.environ:
            # Restore the title from stack
-            write_string('\033[23;0t', self._screen_file)
+            self._write_string('\033[23;0t', self._screen_file)
    def __enter__(self):
        self.save_console_title()
@@ -1211,9 +1214,16 @@ class YoutubeDL(object):
        if not self.params.get('verbose'):
            return
-        write_string('[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' %
+        write_string(
-                 (locale.getpreferredencoding(), sys.getfilesystemencoding(), sys.stdout.encoding, self.get_encoding()))
+            '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
-        write_string('[debug] youtube-dl version ' + __version__ + '\n')
+                locale.getpreferredencoding(),
                sys.getfilesystemencoding(),
                sys.stdout.encoding,
                self.get_encoding()),
            encoding=None
        )
        self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
        try:
            sp = subprocess.Popen(
                ['git', 'rev-parse', '--short', 'HEAD'],
@@ -1222,20 +1232,20 @@ class YoutubeDL(object):
            out, err = sp.communicate()
            out = out.decode().strip()
            if re.match('[0-9a-f]+', out):
-                write_string('[debug] Git HEAD: ' + out + '\n')
+                self._write_string('[debug] Git HEAD: ' + out + '\n')
        except:
            try:
                sys.exc_clear()
            except:
                pass
-        write_string('[debug] Python version %s - %s' %
+        self._write_string('[debug] Python version %s - %s' %
                     (platform.python_version(), platform_name()) + '\n')
        proxy_map = {}
        for handler in self._opener.handlers:
            if hasattr(handler, 'proxies'):
                proxy_map.update(handler.proxies)
-        write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
+        self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
    def _setup_opener(self):
        timeout_val = self.params.get('socket_timeout')
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@@ -52,6 +52,7 @@ __authors__  = (
    'Juan C. Olivares',
    'Mattias Harrysson',
    'phaer',
    'Sainyam Kapoor',
 )
 __license__ = 'Public Domain'
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@@ -32,6 +32,7 @@ from .canal13cl import Canal13clIE
 from .canalplus import CanalplusIE
 from .canalc2 import Canalc2IE
 from .cbs import CBSIE
 from .cbsnews import CBSNewsIE
 from .ceskatelevize import CeskaTelevizeIE
 from .channel9 import Channel9IE
 from .chilloutzone import ChilloutzoneIE
@@ -62,6 +63,7 @@ from .dotsub import DotsubIE
 from .dreisat import DreiSatIE
 from .defense import DefenseGouvFrIE
 from .discovery import DiscoveryIE
 from .divxstage import DivxStageIE
 from .dropbox import DropboxIE
 from .ebaumsworld import EbaumsWorldIE
 from .ehow import EHowIE
@@ -156,6 +158,7 @@ from .mofosex import MofosexIE
 from .mooshare import MooshareIE
 from .morningstar import MorningstarIE
 from .motorsport import MotorsportIE
 from .movshare import MovShareIE
 from .mtv import (
    MTVIE,
    MTVIggyIE,
@@ -205,6 +208,7 @@ from .rottentomatoes import RottenTomatoesIE
 from .roxwel import RoxwelIE
 from .rtlnow import RTLnowIE
 from .rts import RTSIE
 from .rtve import RTVEALaCartaIE
 from .rutube import (
    RutubeIE,
    RutubeChannelIE,
@@ -276,6 +280,7 @@ from .videodetective import VideoDetectiveIE
 from .videolecturesnet import VideoLecturesNetIE
 from .videofyme import VideofyMeIE
 from .videopremium import VideoPremiumIE
 from .videoweed import VideoWeedIE
 from .vimeo import (
    VimeoIE,
    VimeoChannelIE,
--- a/youtube_dl/extractor/cbsnews.py
+++ b/youtube_dl/extractor/cbsnews.py
@@ -0,0 +1,87 @@
 # encoding: utf-8
 from __future__ import unicode_literals
 import re
 import json
 from .common import InfoExtractor
 class CBSNewsIE(InfoExtractor):
    IE_DESC = 'CBS News'
    _VALID_URL = r'http://(?:www\.)?cbsnews\.com/(?:[^/]+/)+(?P<id>[\da-z_-]+)'
    _TESTS = [
        {
            'url': 'http://www.cbsnews.com/news/tesla-and-spacex-elon-musks-industrial-empire/',
            'info_dict': {
                'id': 'tesla-and-spacex-elon-musks-industrial-empire',
                'ext': 'flv',
                'title': 'Tesla and SpaceX: Elon Musk\'s industrial empire',
                'thumbnail': 'http://beta.img.cbsnews.com/i/2014/03/30/60147937-2f53-4565-ad64-1bdd6eb64679/60-0330-pelley-640x360.jpg',
                'duration': 791,
            },
            'params': {
                # rtmp download
                'skip_download': True,
            },
        },
        {
            'url': 'http://www.cbsnews.com/videos/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/',
            'info_dict': {
                'id': 'fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack',
                'ext': 'flv',
                'title': 'Fort Hood shooting: Army downplays mental illness as cause of attack',
                'thumbnail': 'http://cbsnews2.cbsistatic.com/hub/i/r/2014/04/04/0c9fbc66-576b-41ca-8069-02d122060dd2/thumbnail/140x90/6dad7a502f88875ceac38202984b6d58/en-0404-werner-replace-640x360.jpg',
                'duration': 205,
            },
            'params': {
                # rtmp download
                'skip_download': True,
            },
        },
    ]
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        video_info = json.loads(self._html_search_regex(
            r'(?:<ul class="media-list items" id="media-related-items"><li data-video-info|<div id="cbsNewsVideoPlayer" data-video-player-options)=\'({.+?})\'',
            webpage, 'video JSON info'))
        item = video_info['item'] if 'item' in video_info else video_info
        title = item.get('articleTitle') or item.get('hed')
        duration = item.get('duration')
        thumbnail = item.get('mediaImage') or item.get('thumbnail')
        formats = []
        for format_id in ['RtmpMobileLow', 'RtmpMobileHigh', 'Hls', 'RtmpDesktop']:
            uri = item.get('media' + format_id + 'URI')
            if not uri:
                continue
            fmt = {
                'url': uri,
                'format_id': format_id,
            }
            if uri.startswith('rtmp'):
                fmt.update({
                    'app': 'ondemand?auth=cbs',
                    'play_path': 'mp4:' + uri.split('<break>')[-1],
                    'player_url': 'http://www.cbsnews.com/[[IMPORT]]/vidtech.cbsinteractive.com/player/3_3_0/CBSI_PLAYER_HD.swf',
                    'page_url': 'http://www.cbsnews.com',
                    'ext': 'flv',
                })
            elif uri.endswith('.m3u8'):
                fmt['ext'] = 'mp4'
            formats.append(fmt)
        return {
            'id': video_id,
            'title': title,
            'thumbnail': thumbnail,
            'duration': duration,
            'formats': formats,
        }
--- a/youtube_dl/extractor/divxstage.py
+++ b/youtube_dl/extractor/divxstage.py
@@ -0,0 +1,27 @@
 from __future__ import unicode_literals
 from .novamov import NovaMovIE
 class DivxStageIE(NovaMovIE):
    IE_NAME = 'divxstage'
    IE_DESC = 'DivxStage'
    _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'divxstage\.(?:eu|net|ch|co|at|ag)'}
    _HOST = 'www.divxstage.eu'
    _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
    _TITLE_REGEX = r'<div class="video_det">\s*<strong>([^<]+)</strong>'
    _DESCRIPTION_REGEX = r'<div class="video_det">\s*<strong>[^<]+</strong>\s*<p>([^<]+)</p>'
    _TEST = {
        'url': 'http://www.divxstage.eu/video/57f238e2e5e01',
        'md5': '63969f6eb26533a1968c4d325be63e72',
        'info_dict': {
            'id': '57f238e2e5e01',
            'ext': 'flv',
            'title': 'youtubedl test video',
            'description': 'This is a test video for youtubedl.',
        }
    }
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -184,6 +184,17 @@ class GenericIE(InfoExtractor):
                'description': 'md5:ddb2a40ecd6b6a147e400e535874947b',
            }
        },
        # Embeded Ustream video
        {
            'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
            'md5': '27b99cdb639c9b12a79bca876a073417',
            'info_dict': {
                'id': '45734260',
                'ext': 'flv',
                'uploader': 'AU SPA:  The NSA and Privacy',
                'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
            }
        },
        # nowvideo embed hidden behind percent encoding
        {
            'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
@@ -500,17 +511,18 @@ class GenericIE(InfoExtractor):
        if mobj is not None:
            return self.url_result(mobj.group(1), 'Mpora')
-        # Look for embedded NovaMov player
+        # Look for embedded NovaMov-based player
        mobj = re.search(
-            r'<iframe[^>]+?src=(["\'])(?P<url>http://(?:(?:embed|www)\.)?novamov\.com/embed\.php.+?)\1', webpage)
+            r'''(?x)<iframe[^>]+?src=(["\'])
                    (?P<url>http://(?:(?:embed|www)\.)?
                        (?:novamov\.com|
                           nowvideo\.(?:ch|sx|eu|at|ag|co)|
                           videoweed\.(?:es|com)|
                           movshare\.(?:net|sx|ag)|
                           divxstage\.(?:eu|net|ch|co|at|ag))
                        /embed\.php.+?)\1''', webpage)
        if mobj is not None:
-            return self.url_result(mobj.group('url'), 'NovaMov')
+            return self.url_result(mobj.group('url'))
        # Look for embedded NowVideo player
        mobj = re.search(
            r'<iframe[^>]+?src=(["\'])(?P<url>http://(?:(?:embed|www)\.)?nowvideo\.(?:ch|sx|eu)/embed\.php.+?)\1', webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'NowVideo')
        # Look for embedded Facebook player
        mobj = re.search(
@@ -556,6 +568,12 @@ class GenericIE(InfoExtractor):
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'TED')
        # Look for embedded Ustream videos
        mobj = re.search(
            r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'Ustream')
        # Look for embedded arte.tv player
        mobj = re.search(
            r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
--- a/youtube_dl/extractor/justintv.py
+++ b/youtube_dl/extractor/justintv.py
@@ -1,9 +1,12 @@
 from __future__ import unicode_literals
 import json
 import os
 import re
 from .common import InfoExtractor
 from ..utils import (
    compat_str,
    ExtractorError,
    formatSeconds,
 )
@@ -24,34 +27,31 @@ class JustinTVIE(InfoExtractor):
        /?(?:\#.*)?$
        """
    _JUSTIN_PAGE_LIMIT = 100
-    IE_NAME = u'justin.tv'
+    IE_NAME = 'justin.tv'
    IE_DESC = 'justin.tv and twitch.tv'
    _TEST = {
-        u'url': u'http://www.twitch.tv/thegamedevhub/b/296128360',
+        'url': 'http://www.twitch.tv/thegamedevhub/b/296128360',
-        u'file': u'296128360.flv',
+        'md5': 'ecaa8a790c22a40770901460af191c9a',
-        u'md5': u'ecaa8a790c22a40770901460af191c9a',
+        'info_dict': {
-        u'info_dict': {
+            'id': '296128360',
-            u"upload_date": u"20110927", 
+            'ext': 'flv',
-            u"uploader_id": 25114803, 
+            'upload_date': '20110927',
-            u"uploader": u"thegamedevhub", 
+            'uploader_id': 25114803,
-            u"title": u"Beginner Series - Scripting With Python Pt.1"
+            'uploader': 'thegamedevhub',
            'title': 'Beginner Series - Scripting With Python Pt.1'
        }
    }
    def report_download_page(self, channel, offset):
        """Report attempt to download a single page of videos."""
        self.to_screen(u'%s: Downloading video information from %d to %d' %
                (channel, offset, offset + self._JUSTIN_PAGE_LIMIT))
    # Return count of items, list of *valid* items
    def _parse_page(self, url, video_id):
        info_json = self._download_webpage(url, video_id,
-                                           u'Downloading video info JSON',
+                                           'Downloading video info JSON',
-                                           u'unable to download video info JSON')
+                                           'unable to download video info JSON')
        response = json.loads(info_json)
        if type(response) != list:
            error_text = response.get('error', 'unknown error')
-            raise ExtractorError(u'Justin.tv API: %s' % error_text)
+            raise ExtractorError('Justin.tv API: %s' % error_text)
        info = []
        for clip in response:
            video_url = clip['video_file_url']
@@ -62,7 +62,7 @@ class JustinTVIE(InfoExtractor):
                video_id = clip['id']
                video_title = clip.get('title', video_id)
                info.append({
-                    'id': video_id,
+                    'id': compat_str(video_id),
                    'url': video_url,
                    'title': video_title,
                    'uploader': clip.get('channel_name', video_uploader_id),
@@ -74,8 +74,6 @@ class JustinTVIE(InfoExtractor):
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        if mobj is None:
            raise ExtractorError(u'invalid URL: %s' % url)
        api_base = 'http://api.justin.tv'
        paged = False
@@ -89,40 +87,41 @@ class JustinTVIE(InfoExtractor):
            webpage = self._download_webpage(url, chapter_id)
            m = re.search(r'PP\.archive_id = "([0-9]+)";', webpage)
            if not m:
-                raise ExtractorError(u'Cannot find archive of a chapter')
+                raise ExtractorError('Cannot find archive of a chapter')
            archive_id = m.group(1)
            api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id
-            doc = self._download_xml(api, chapter_id,
+            doc = self._download_xml(
-                                             note=u'Downloading chapter information',
+                api, chapter_id,
-                                             errnote=u'Chapter information download failed')
+                note='Downloading chapter information',
                errnote='Chapter information download failed')
            for a in doc.findall('.//archive'):
                if archive_id == a.find('./id').text:
                    break
            else:
-                raise ExtractorError(u'Could not find chapter in chapter information')
+                raise ExtractorError('Could not find chapter in chapter information')
            video_url = a.find('./video_file_url').text
-            video_ext = video_url.rpartition('.')[2] or u'flv'
+            video_ext = video_url.rpartition('.')[2] or 'flv'
-            chapter_api_url = u'https://api.twitch.tv/kraken/videos/c' + chapter_id
+            chapter_api_url = 'https://api.twitch.tv/kraken/videos/c' + chapter_id
-            chapter_info_json = self._download_webpage(chapter_api_url, u'c' + chapter_id,
+            chapter_info = self._download_json(
-                                   note='Downloading chapter metadata',
+                chapter_api_url, 'c' + chapter_id,
-                                   errnote='Download of chapter metadata failed')
+                note='Downloading chapter metadata',
-            chapter_info = json.loads(chapter_info_json)
+                errnote='Download of chapter metadata failed')
            bracket_start = int(doc.find('.//bracket_start').text)
            bracket_end = int(doc.find('.//bracket_end').text)
            # TODO determine start (and probably fix up file)
            #  youtube-dl -v http://www.twitch.tv/firmbelief/c/1757457
-            #video_url += u'?start=' + TODO:start_timestamp
+            #video_url += '?start=' + TODO:start_timestamp
            # bracket_start is 13290, but we want 51670615
-            self._downloader.report_warning(u'Chapter detected, but we can just download the whole file. '
+            self._downloader.report_warning('Chapter detected, but we can just download the whole file. '
-                                            u'Chapter starts at %s and ends at %s' % (formatSeconds(bracket_start), formatSeconds(bracket_end)))
+                                            'Chapter starts at %s and ends at %s' % (formatSeconds(bracket_start), formatSeconds(bracket_end)))
            info = {
-                'id': u'c' + chapter_id,
+                'id': 'c' + chapter_id,
                'url': video_url,
                'ext': video_ext,
                'title': chapter_info['title'],
@@ -131,14 +130,12 @@ class JustinTVIE(InfoExtractor):
                'uploader': chapter_info['channel']['display_name'],
                'uploader_id': chapter_info['channel']['name'],
            }
-            return [info]
+            return info
        else:
            video_id = mobj.group('videoid')
            api = api_base + '/broadcast/by_archive/%s.json' % video_id
-        self.report_extraction(video_id)
+        entries = []
        info = []
        offset = 0
        limit = self._JUSTIN_PAGE_LIMIT
        while True:
@@ -146,8 +143,12 @@ class JustinTVIE(InfoExtractor):
                self.report_download_page(video_id, offset)
            page_url = api + ('?offset=%d&limit=%d' % (offset, limit))
            page_count, page_info = self._parse_page(page_url, video_id)
-            info.extend(page_info)
+            entries.extend(page_info)
            if not paged or page_count != limit:
                break
            offset += limit
-        return info
+        return {
            '_type': 'playlist',
            'id': video_id,
            'entries': entries,
        }
--- a/youtube_dl/extractor/motorsport.py
+++ b/youtube_dl/extractor/motorsport.py
@@ -44,7 +44,7 @@ class MotorsportIE(InfoExtractor):
        e = compat_str(int(time.time()) + 24 * 60 * 60)
        base_video_url = params['location'] + '?e=' + e
        s = 'h3hg713fh32'
-        h = hashlib.md5(s + base_video_url).hexdigest()
+        h = hashlib.md5((s + base_video_url).encode('utf-8')).hexdigest()
        video_url = base_video_url + '&h=' + h
        uploader = self._html_search_regex(
--- a/youtube_dl/extractor/movshare.py
+++ b/youtube_dl/extractor/movshare.py
@@ -0,0 +1,27 @@
 from __future__ import unicode_literals
 from .novamov import NovaMovIE
 class MovShareIE(NovaMovIE):
    IE_NAME = 'movshare'
    IE_DESC = 'MovShare'
    _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'movshare\.(?:net|sx|ag)'}
    _HOST = 'www.movshare.net'
    _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
    _TITLE_REGEX = r'<strong>Title:</strong> ([^<]+)</p>'
    _DESCRIPTION_REGEX = r'<strong>Description:</strong> ([^<]+)</p>'
    _TEST = {
        'url': 'http://www.movshare.net/video/559e28be54d96',
        'md5': 'abd31a2132947262c50429e1d16c1bfd',
        'info_dict': {
            'id': '559e28be54d96',
            'ext': 'flv',
            'title': 'dissapeared image',
            'description': 'optical illusion  dissapeared image  magic illusion',
        }
    }
--- a/youtube_dl/extractor/novamov.py
+++ b/youtube_dl/extractor/novamov.py
@@ -13,7 +13,8 @@ class NovaMovIE(InfoExtractor):
    IE_NAME = 'novamov'
    IE_DESC = 'NovaMov'
-    _VALID_URL = r'http://(?:(?:www\.)?%(host)s/video/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P<videoid>[a-z\d]{13})' % {'host': 'novamov\.com'}
+    _VALID_URL_TEMPLATE = r'http://(?:(?:www\.)?%(host)s/(?:file|video)/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P<id>[a-z\d]{13})'
    _VALID_URL = _VALID_URL_TEMPLATE % {'host': 'novamov\.com'}
    _HOST = 'www.novamov.com'
@@ -36,18 +37,17 @@ class NovaMovIE(InfoExtractor):
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('videoid')
+        video_id = mobj.group('id')
        page = self._download_webpage(
            'http://%s/video/%s' % (self._HOST, video_id), video_id, 'Downloading video page')
        if re.search(self._FILE_DELETED_REGEX, page) is not None:
-            raise ExtractorError(u'Video %s does not exist' % video_id, expected=True)
+            raise ExtractorError('Video %s does not exist' % video_id, expected=True)
        filekey = self._search_regex(self._FILEKEY_REGEX, page, 'filekey')
        title = self._html_search_regex(self._TITLE_REGEX, page, 'title', fatal=False)
        description = self._html_search_regex(self._DESCRIPTION_REGEX, page, 'description', default='', fatal=False)
        api_response = self._download_webpage(
--- a/youtube_dl/extractor/nowvideo.py
+++ b/youtube_dl/extractor/nowvideo.py
@@ -7,7 +7,7 @@ class NowVideoIE(NovaMovIE):
    IE_NAME = 'nowvideo'
    IE_DESC = 'NowVideo'
-    _VALID_URL = r'http://(?:(?:www\.)?%(host)s/video/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P<videoid>[a-z\d]{13})' % {'host': 'nowvideo\.(?:ch|sx|eu)'}
+    _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'nowvideo\.(?:ch|sx|eu|at|ag|co)'}
    _HOST = 'www.nowvideo.ch'
--- a/youtube_dl/extractor/ro220.py
+++ b/youtube_dl/extractor/ro220.py
@@ -18,7 +18,7 @@ class Ro220IE(InfoExtractor):
        'md5': '03af18b73a07b4088753930db7a34add',
        'info_dict': {
            "title": "Luati-le Banii sez 4 ep 1",
-            "description": "Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.",
+            "description": "re:^Iata-ne reveniti dupa o binemeritata vacanta\. +Va astept si pe Facebook cu pareri si comentarii.$",
        }
    }
--- a/youtube_dl/extractor/rts.py
+++ b/youtube_dl/extractor/rts.py
@@ -35,13 +35,13 @@ class RTSIE(InfoExtractor):
        },
        {
            'url': 'http://www.rts.ch/emissions/passe-moi-les-jumelles/5624067-entre-ciel-et-mer.html',
-            'md5': 'c197f0b2421995c63a64cc73d800f42e',
+            'md5': 'c148457a27bdc9e5b1ffe081a7a8337b',
            'info_dict': {
-                'id': '5738317',
+                'id': '5624067',
                'ext': 'mp4',
-                'duration': 55,
+                'duration': 3720,
-                'title': 'Bande de lancement de Passe-moi les jumelles',
+                'title': 'Les yeux dans les cieux - Mon homard au Canada',
-                'description': '',
+                'description': 'md5:d22ee46f5cc5bac0912e5a0c6d44a9f7',
                'uploader': 'Passe-moi les jumelles',
                'upload_date': '20140404',
                'timestamp': 1396635300,
@@ -98,17 +98,20 @@ class RTSIE(InfoExtractor):
        m = re.match(self._VALID_URL, url)
        video_id = m.group('id')
-        def download_json(video_id):
+        def download_json(internal_id):
            return self._download_json(
-                'http://www.rts.ch/a/%s.html?f=json/article' % video_id, video_id)
+                'http://www.rts.ch/a/%s.html?f=json/article' % internal_id,
                video_id)
        all_info = download_json(video_id)
        # video_id extracted out of URL is not always a real id
        if 'video' not in all_info and 'audio' not in all_info:
            page = self._download_webpage(url, video_id)
-            video_id = self._html_search_regex(r'<(?:video|audio) data-id="(\d+)"', page, 'video id')
+            internal_id = self._html_search_regex(
-            all_info = download_json(video_id)
+                r'<(?:video|audio) data-id="([0-9]+)"', page,
                'internal video id')
            all_info = download_json(internal_id)
        info = all_info['video']['JSONinfo'] if 'video' in all_info else all_info['audio']
--- a/youtube_dl/extractor/rtve.py
+++ b/youtube_dl/extractor/rtve.py
@@ -0,0 +1,84 @@
 # encoding: utf-8
 from __future__ import unicode_literals
 import re
 import base64
 from .common import InfoExtractor
 from ..utils import (
    struct_unpack,
 )
 class RTVEALaCartaIE(InfoExtractor):
    IE_NAME = 'rtve.es:alacarta'
    IE_DESC = 'RTVE a la carta'
    _VALID_URL = r'http://www\.rtve\.es/alacarta/videos/[^/]+/[^/]+/(?P<id>\d+)'
    _TEST = {
        'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/',
        'md5': '18fcd45965bdd076efdb12cd7f6d7b9e',
        'info_dict': {
            'id': '2491869',
            'ext': 'mp4',
            'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia',
        },
    }
    def _decrypt_url(self, png):
        encrypted_data = base64.b64decode(png)
        text_index = encrypted_data.find(b'tEXt')
        text_chunk = encrypted_data[text_index-4:]
        length = struct_unpack('!I', text_chunk[:4])[0]
        # Use bytearray to get integers when iterating in both python 2.x and 3.x
        data = bytearray(text_chunk[8:8+length])
        data = [chr(b) for b in data if b != 0]
        hash_index = data.index('#')
        alphabet_data = data[:hash_index]
        url_data = data[hash_index+1:]
        alphabet = []
        e = 0
        d = 0
        for l in alphabet_data:
            if d == 0:
                alphabet.append(l)
                d = e = (e + 1) % 4
            else:
                d -= 1
        url = ''
        f = 0
        e = 3
        b = 1
        for letter in url_data:
            if f == 0:
                l = int(letter)*10
                f = 1
            else:
                if e == 0:
                    l += int(letter)
                    url += alphabet[l]
                    e = (b + 3) % 4
                    f = 0
                    b += 1
                else:
                    e -= 1
        return url
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        info = self._download_json(
            'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id,
            video_id)['page']['items'][0]
        png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % video_id
        png = self._download_webpage(png_url, video_id, 'Downloading url information')
        video_url = self._decrypt_url(png)
        return {
            'id': video_id,
            'title': info['title'],
            'url': video_url,
            'thumbnail': info['image'],
        }
--- a/youtube_dl/extractor/teamcoco.py
+++ b/youtube_dl/extractor/teamcoco.py
@@ -9,8 +9,18 @@ from ..utils import (
 class TeamcocoIE(InfoExtractor):
-    _VALID_URL = r'http://teamcoco\.com/video/(?P<url_title>.*)'
+    _VALID_URL = r'http://teamcoco\.com/video/(?P<video_id>[0-9]+)?/?(?P<display_id>.*)'
-    _TEST = {
+    _TESTS = [
    {
        'url': 'http://teamcoco.com/video/80187/conan-becomes-a-mary-kay-beauty-consultant',
        'file': '80187.mp4',
        'md5': '3f7746aa0dc86de18df7539903d399ea',
        'info_dict': {
            'title': 'Conan Becomes A Mary Kay Beauty Consultant',
            'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.'
        }
    },
    {
        'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
        'file': '19705.mp4',
        'md5': 'cde9ba0fa3506f5f017ce11ead928f9a',
@@ -19,22 +29,23 @@ class TeamcocoIE(InfoExtractor):
            "title": "Louis C.K. Interview Pt. 1 11/3/11"
        }
    }
    ]
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        if mobj is None:
            raise ExtractorError('Invalid URL: %s' % url)
        url_title = mobj.group('url_title')
        webpage = self._download_webpage(url, url_title)
-        video_id = self._html_search_regex(
+        display_id = mobj.group('display_id')
-            r'<article class="video" data-id="(\d+?)"',
+        webpage = self._download_webpage(url, display_id)
-            webpage, 'video id')
+        
-
+        video_id = mobj.group("video_id")
-        self.report_extraction(video_id)
+        if not video_id:
            video_id = self._html_search_regex(
                r'<article class="video" data-id="(\d+?)"',
                webpage, 'video id')
        data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
-        data = self._download_xml(data_url, video_id, 'Downloading data webpage')
+        data = self._download_xml(
            data_url, display_id, 'Downloading data webpage')
        qualities = ['500k', '480p', '1000k', '720p', '1080p']
        formats = []
@@ -69,6 +80,7 @@ class TeamcocoIE(InfoExtractor):
        return {
            'id': video_id,
            'display_id': display_id,
            'formats': formats,
            'title': self._og_search_title(webpage),
            'thumbnail': self._og_search_thumbnail(webpage),
--- a/youtube_dl/extractor/ted.py
+++ b/youtube_dl/extractor/ted.py
@@ -37,6 +37,7 @@ class TEDIE(SubtitlesInfoExtractor):
                'consciousness, but that half the time our brains are '
                'actively fooling us.'),
            'uploader': 'Dan Dennett',
            'width': 854,
        }
    }, {
        'url': 'http://www.ted.com/watch/ted-institute/ted-bcg/vishal-sikka-the-beauty-and-power-of-algorithms',
@@ -50,10 +51,10 @@ class TEDIE(SubtitlesInfoExtractor):
        }
    }]
-    _FORMATS_PREFERENCE = {
+    _NATIVE_FORMATS = {
-        'low': 1,
+        'low': {'preference': 1, 'width': 320, 'height': 180},
-        'medium': 2,
+        'medium': {'preference': 2, 'width': 512, 'height': 288},
-        'high': 3,
+        'high': {'preference': 3, 'width': 854, 'height': 480},
    }
    def _extract_info(self, webpage):
@@ -98,12 +99,14 @@ class TEDIE(SubtitlesInfoExtractor):
        talk_info = self._extract_info(webpage)['talks'][0]
        formats = [{
            'ext': 'mp4',
            'url': format_url,
            'format_id': format_id,
            'format': format_id,
            'preference': self._FORMATS_PREFERENCE.get(format_id, -1),
        } for (format_id, format_url) in talk_info['nativeDownloads'].items()]
        for f in formats:
            finfo = self._NATIVE_FORMATS.get(f['format_id'])
            if finfo:
                f.update(finfo)
        self._sort_formats(formats)
        video_id = compat_str(talk_info['id'])
--- a/youtube_dl/extractor/ustream.py
+++ b/youtube_dl/extractor/ustream.py
@@ -11,7 +11,7 @@ from ..utils import (
 class UstreamIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.ustream\.tv/recorded/(?P<videoID>\d+)'
+    _VALID_URL = r'https?://www\.ustream\.tv/(?P<type>recorded|embed)/(?P<videoID>\d+)'
    IE_NAME = 'ustream'
    _TEST = {
        'url': 'http://www.ustream.tv/recorded/20274954',
@@ -25,6 +25,13 @@ class UstreamIE(InfoExtractor):
    def _real_extract(self, url):
        m = re.match(self._VALID_URL, url)
        if m.group('type') == 'embed':
            video_id = m.group('videoID')
            webpage = self._download_webpage(url, video_id)
            desktop_video_id = self._html_search_regex(r'ContentVideoIds=\["([^"]*?)"\]', webpage, 'desktop_video_id')
            desktop_url = 'http://www.ustream.tv/recorded/' + desktop_video_id
            return self.url_result(desktop_url, 'Ustream')
        video_id = m.group('videoID')
        video_url = 'http://tcdn.ustream.tv/video/%s' % video_id
--- a/youtube_dl/extractor/videoweed.py
+++ b/youtube_dl/extractor/videoweed.py
@@ -0,0 +1,26 @@
 from __future__ import unicode_literals
 from .novamov import NovaMovIE
 class VideoWeedIE(NovaMovIE):
    IE_NAME = 'videoweed'
    IE_DESC = 'VideoWeed'
    _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'videoweed\.(?:es|com)'}
    _HOST = 'www.videoweed.es'
    _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
    _TITLE_REGEX = r'<h1 class="text_shadow">([^<]+)</h1>'
    _TEST = {
        'url': 'http://www.videoweed.es/file/b42178afbea14',
        'md5': 'abd31a2132947262c50429e1d16c1bfd',
        'info_dict': {
            'id': 'b42178afbea14',
            'ext': 'flv',
            'title': 'optical illusion  dissapeared image magic illusion',
            'description': ''
        },
    }
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -2,6 +2,7 @@
 # -*- coding: utf-8 -*-
 import calendar
 import codecs
 import contextlib
 import ctypes
 import datetime
@@ -909,25 +910,81 @@ def platform_name():
    return res
-def write_string(s, out=None):
+def _windows_write_string(s, out):
    """ Returns True if the string was written using special methods,
    False if it has yet to be written out."""
    # Adapted from http://stackoverflow.com/a/3259271/35070
    import ctypes
    import ctypes.wintypes
    WIN_OUTPUT_IDS = {
        1: -11,
        2: -12,
    }
    fileno = out.fileno()
    if fileno not in WIN_OUTPUT_IDS:
        return False
    GetStdHandle = ctypes.WINFUNCTYPE(
        ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
        ("GetStdHandle", ctypes.windll.kernel32))
    h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
    WriteConsoleW = ctypes.WINFUNCTYPE(
        ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
        ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
        ctypes.wintypes.LPVOID)(("WriteConsoleW", ctypes.windll.kernel32))
    written = ctypes.wintypes.DWORD(0)
    GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(("GetFileType", ctypes.windll.kernel32))
    FILE_TYPE_CHAR = 0x0002
    FILE_TYPE_REMOTE = 0x8000
    GetConsoleMode = ctypes.WINFUNCTYPE(
        ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
        ctypes.POINTER(ctypes.wintypes.DWORD))(
        ("GetConsoleMode", ctypes.windll.kernel32))
    INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
    def not_a_console(handle):
        if handle == INVALID_HANDLE_VALUE or handle is None:
            return True
        return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
                or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
    if not_a_console(h):
        return False
    remaining = len(s)
    while remaining > 0:
        ret = WriteConsoleW(
            h, s, min(len(s), 1024), ctypes.byref(written), None)
        if ret == 0:
            raise OSError('Failed to write string')
        remaining -= written.value
    return True
 def write_string(s, out=None, encoding=None):
    if out is None:
        out = sys.stderr
    assert type(s) == compat_str
    if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
        if _windows_write_string(s, out):
            return
    if ('b' in getattr(out, 'mode', '') or
            sys.version_info[0] < 3):  # Python 2 lies about mode of sys.stderr
-        s = s.encode(preferredencoding(), 'ignore')
+        byt = s.encode(encoding or preferredencoding(), 'ignore')
-    try:
+        out.write(byt)
    elif hasattr(out, 'buffer'):
        enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
        byt = s.encode(enc, 'ignore')
        out.buffer.write(byt)
    else:
        out.write(s)
    except UnicodeEncodeError:
        # In Windows shells, this can fail even when the codec is just charmap!?
        # See https://wiki.python.org/moin/PrintFails#Issue
        if sys.platform == 'win32' and hasattr(out, 'encoding'):
            s = s.encode(out.encoding, 'ignore').decode(out.encoding)
            out.write(s)
        else:
            raise
    out.flush()
@@ -1263,9 +1320,11 @@ class PagedList(object):
 def uppercase_escape(s):
    unicode_escape = codecs.getdecoder('unicode_escape')
    return re.sub(
        r'\\U[0-9a-fA-F]{8}',
-        lambda m: m.group(0).decode('unicode-escape'), s)
+        lambda m: unicode_escape(m.group(0))[0],
        s)
 try:
    struct.pack(u'!I', 0)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
-__version__ = '2014.04.04.5'
+__version__ = '2014.04.07.3'
Author	SHA1	Message	Date
Philipp Hagemeister	a5863bdf33	release 2014.04.07.3	2014-04-07 22:48:45 +02:00
Philipp Hagemeister	b58ddb32ba	[utils] Completely rewrite Windows output (Fixes #2672 )	2014-04-07 22:48:13 +02:00
Philipp Hagemeister	b9e12a8140	release 2014.04.07.2	2014-04-07 21:41:20 +02:00
Philipp Hagemeister	104aa7388a	Use our own encoding when writing strings	2014-04-07 21:40:34 +02:00
Philipp Hagemeister	c3855d28b0	Merge branch 'master' of github.com:rg3/youtube-dl	2014-04-07 19:57:51 +02:00
Philipp Hagemeister	734f90bb41	Use --encoding when outputting	2014-04-07 19:57:42 +02:00
Jaime Marquínez Ferrándiz	91a6addeeb	Add support for rtve.es/alacarta	2014-04-07 17:30:32 +02:00
Philipp Hagemeister	9afb76c5ad	release 2014.04.07.1	2014-04-07 15:28:55 +02:00
Philipp Hagemeister	dfb2cb5cfd	[teamcoco] Simplify ID management (Closes #2715 )	2014-04-07 15:25:35 +02:00
Philipp Hagemeister	650d688d10	release 2014.04.07	2014-04-07 13:11:37 +02:00
Philipp Hagemeister	0ba77818f3	[ted] Add width and height (Fixes #2716 )	2014-04-07 13:11:30 +02:00
Sergey M․	09baa7da7e	[rts] Update test	2014-04-07 00:34:23 +07:00
Sergey M․	85e787f51d	[cbsnews] Add support for cbsnews.com (Closes #2691 )	2014-04-06 06:03:58 +07:00
Philipp Hagemeister	2a9e1e453a	Merge branch 'master' of github.com:rg3/youtube-dl	2014-04-05 20:05:47 +02:00
Philipp Hagemeister	ee1e199685	[justin.tv] Modernize (Fixes #2705 )	2014-04-05 17:56:36 +02:00
Sergey M․	17c5a00774	[novamov] Simplify	2014-04-05 19:36:22 +07:00
Sergey M․	15c0e8e7b2	[generic] Generalize novamov based embeds	2014-04-05 17:20:05 +07:00
Sergey M․	cca37fba48	[divxstage] Fix typo in IE_NAME	2014-04-05 17:15:43 +07:00
Sergey M․	9d0993ec4a	[movshare] Support more domains	2014-04-05 17:00:18 +07:00
Sergey M․	342f33bf9e	[divxstage] Support more domains	2014-04-05 16:50:05 +07:00
Sergey M․	7cd3bc5f99	[nowvideo] Support more domains	2014-04-05 16:38:57 +07:00
Sergey M․	931055e6cb	[videoweed] Revert _FILE_DELETED_REGEX	2014-04-05 16:32:14 +07:00
Sergey M․	d0e4cf82f1	[movshare] Add _FILE_DELETED_REGEX	2014-04-05 16:31:38 +07:00
Sergey M․	6f88df2c57	[divxstage] Add support for divxstage.eu	2014-04-05 16:29:44 +07:00
Sergey M․	4479bf2762	[videoweed] Simplify	2014-04-05 16:09:28 +07:00
Sergey M․	1ff7c0f7d8	[movshare] Add support for movshare.net	2014-04-05 16:09:03 +07:00
Sergey M․	610e47c87e	Credit @sainyamkapoor for videoweed extractor	2014-04-05 15:53:50 +07:00
Sergey M․	50f566076f	[generic] Add support for videoweed embeds	2014-04-05 15:49:45 +07:00
Sergey M․	92810ff497	[nowvideo] Improve _VALID_URL	2014-04-05 15:35:21 +07:00
Sergey M․	60ccc59a1c	[novamov] Improve _VALID_URL	2014-04-05 15:34:54 +07:00
Sergey M․	91745595d3	[videoweed] Simplify	2014-04-05 15:32:55 +07:00
Sainyam Kapoor	d6e40507d0	[videoweed]Cleanup	2014-04-05 10:53:22 +05:30
Sainyam Kapoor	deed48b472	[Videoweed] Added support for videoweed.	2014-04-05 10:40:03 +05:30
Philipp Hagemeister	e4d41bfca5	Merge pull request #2696 from anovicecodemonkey/support-ustream-embeds [UstreamIE] [generic] Added support for Ustream embed URLs (Fixes #2694)	2014-04-04 23:33:08 +02:00
Philipp Hagemeister	a355b70f27	[cspan] Do not test number of playlist entries Apparently, CSpan switches between single-file and multiple-file results. Either one is fine as long as we get the full four hours.	2014-04-04 23:16:22 +02:00
Philipp Hagemeister	f8514f6186	[rts] Use visible id in file names Maybe the internal ID is more precise, but it's totally confusing, and the obvious ID still allows a google search.	2014-04-04 23:13:55 +02:00
Philipp Hagemeister	e09b8fcd9d	[ro220] Make test case more flexible Either one or two spaces is fine here.	2014-04-04 23:08:33 +02:00
Philipp Hagemeister	7d1b527ff9	[motorsport] Fix on Python 3	2014-04-04 23:06:27 +02:00
Philipp Hagemeister	f943c7b622	release 2014.04.04.7	2014-04-04 23:01:45 +02:00
Philipp Hagemeister	676eb3f2dd	Fix unicode_escape (Fixes #2695 )	2014-04-04 23:00:51 +02:00
Philipp Hagemeister	98b7cf1ace	release 2014.04.04.6	2014-04-04 22:48:35 +02:00
Philipp Hagemeister	c465afd736	[teamcoco] Fix regex in 2.6 (#2700 ) The re engine does not want to repeat an empty string, for fear that something like (.) could be matching the tokens ... "" "" "" "" "" "" Of course, that's harmless with a question mark, although still somewhat strange.	2014-04-04 22:46:47 +02:00
Philipp Hagemeister	b84d6e7fc4	Merge remote-tracking branch 'AGSPhoenix/teamcoco-fix'	2014-04-04 22:44:49 +02:00
AGSPhoenix	fa387d2d99	Revert "Workaround for regex engine limitation" This reverts commit `6d0d573eca`.	2014-04-04 15:37:49 -04:00
AGSPhoenix	6d0d573eca	Workaround for regex engine limitation	2014-04-04 15:25:28 -04:00
AGSPhoenix	bb799e811b	Add a test for the new URL pages Add a test for the pages with the video_id in the URL.	2014-04-04 13:52:35 -04:00
AGSPhoenix	04ee53eca1	Support TeamCoco URLs with video_id in the title If the URL has the video_id in it, use that since the current method of finding the id breaks on those pages. Fixes 2698.	2014-04-04 13:42:34 -04:00
anovicecodemonkey	ca6aada48e	Fix _TEST for Ustream embed URLs	2014-04-05 03:26:29 +10:30
anovicecodemonkey	5c38625259	[UstreamIE] [generic] Added support for Ustream embed URLs (Fixes #2694 )	2014-04-05 00:53:09 +10:30
`@@ -1,2 +1,2 @@`

	`__version__ = '2014.04.04.5'`	`__version__ = '2014.04.07.3'`