release 2013.12.16.6

Merge branch 'master' of github.com:rg3/youtube-dl
Add support for embedded vevo player (Fixes #1957 )
2026-05-07 05:48:16 +00:00 · 2013-12-16 21:46:16 +01:00 · 2013-12-16 21:46:11 +01:00 · 2013-12-16 21:45:21 +01:00 · 2013-12-16 21:42:41 +01:00 · 2013-12-16 21:34:47 +01:00
27 changed files with 801 additions and 145 deletions
--- a/README.md
+++ b/README.md
@@ -56,6 +56,10 @@ which means you can modify it, redistribute it or use it however you like.
    --date DATE                download only videos uploaded in this date
    --datebefore DATE          download only videos uploaded before this date
    --dateafter DATE           download only videos uploaded after this date
    --min-views COUNT          Do not download any videos with less than COUNT
                               views
    --max-views COUNT          Do not download any videos with more than COUNT
                               views
    --no-playlist              download only the currently playing video
    --age-limit YEARS          download only videos suitable for the given age
    --download-archive FILE    Download only videos not listed in the archive
@@ -127,6 +131,7 @@ which means you can modify it, redistribute it or use it however you like.
    --get-id                   simulate, quiet but print id
    --get-thumbnail            simulate, quiet but print thumbnail URL
    --get-description          simulate, quiet but print video description
    --get-duration             simulate, quiet but print video length
    --get-filename             simulate, quiet but print output filename
    --get-format               simulate, quiet but print output format
    -j, --dump-json            simulate, quiet but print JSON information
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@@ -10,6 +10,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 from test.helper import get_testcases
 from youtube_dl.extractor import (
    FacebookIE,
    gen_extractors,
    JustinTVIE,
    YoutubeIE,
@@ -87,12 +88,15 @@ class TestAllURLsMatching(unittest.TestCase):
        assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc')
        assertExtractId('BaW_jenozKc', 'BaW_jenozKc')
    def test_facebook_matching(self):
        self.assertTrue(FacebookIE.suitable(u'https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
    def test_no_duplicates(self):
        ies = gen_extractors()
        for tc in get_testcases():
            url = tc['url']
            for ie in ies:
-                if type(ie).__name__ in ['GenericIE', tc['name'] + 'IE']:
+                if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'):
                    self.assertTrue(ie.suitable(url), '%s should match URL %r' % (type(ie).__name__, url))
                else:
                    self.assertFalse(ie.suitable(url), '%s should not match URL %r' % (type(ie).__name__, url))
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -34,6 +34,7 @@ from .utils import (
    encodeFilename,
    ExtractorError,
    format_bytes,
    formatSeconds,
    get_term_width,
    locked_file,
    make_HTTPS_handler,
@@ -94,6 +95,7 @@ class YoutubeDL(object):
    forcethumbnail:    Force printing thumbnail URL.
    forcedescription:  Force printing description.
    forcefilename:     Force printing final filename.
    forceduration:     Force printing duration.
    forcejson:         Force printing info_dict as JSON.
    simulate:          Do not download the video files.
    format:            Video format code.
@@ -127,7 +129,16 @@ class YoutubeDL(object):
    noplaylist:        Download single video instead of a playlist if in doubt.
    age_limit:         An integer representing the user's age in years.
                       Unsuitable videos for the given age are skipped.
-    download_archive:   File name of a file where all downloads are recorded.
+    min_views:         An integer representing the minimum view count the video
                       must have in order to not be skipped.
                       Videos without view count information are always
                       downloaded. None for no limit.
    max_views:         An integer representing the maximum view count.
                       Videos that are more popular than that are not
                       downloaded.
                       Videos without view count information are always
                       downloaded. None for no limit.
    download_archive:  File name of a file where all downloads are recorded.
                       Videos already present in the file are not downloaded
                       again.
    cookiefile:        File name where cookies should be read from and dumped to.
@@ -355,22 +366,6 @@ class YoutubeDL(object):
        error_message = u'%s %s' % (_msg_header, message)
        self.trouble(error_message, tb)
    def report_writedescription(self, descfn):
        """ Report that the description file is being written """
        self.to_screen(u'[info] Writing video description to: ' + descfn)
    def report_writesubtitles(self, sub_filename):
        """ Report that the subtitles file is being written """
        self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
    def report_writeinfojson(self, infofn):
        """ Report that the metadata file has been written """
        self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
    def report_writeannotations(self, annofn):
        """ Report that the annotations file has been written. """
        self.to_screen(u'[info] Writing video annotations to: ' + annofn)
    def report_file_already_downloaded(self, file_name):
        """Report file has already been fully downloaded."""
        try:
@@ -415,13 +410,14 @@ class YoutubeDL(object):
    def _match_entry(self, info_dict):
        """ Returns None iff the file should be downloaded """
        video_title = info_dict.get('title', info_dict.get('id', u'video'))
        if 'title' in info_dict:
            # This can happen when we're just evaluating the playlist
            title = info_dict['title']
            matchtitle = self.params.get('matchtitle', False)
            if matchtitle:
                if not re.search(matchtitle, title, re.IGNORECASE):
-                    return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
+                    return u'"' + title + '" title did not match pattern "' + matchtitle + '"'
            rejecttitle = self.params.get('rejecttitle', False)
            if rejecttitle:
                if re.search(rejecttitle, title, re.IGNORECASE):
@@ -430,14 +426,21 @@ class YoutubeDL(object):
        if date is not None:
            dateRange = self.params.get('daterange', DateRange())
            if date not in dateRange:
-                return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
+                return u'%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
        view_count = info_dict.get('view_count', None)
        if view_count is not None:
            min_views = self.params.get('min_views')
            if min_views is not None and view_count < min_views:
                return u'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
            max_views = self.params.get('max_views')
            if max_views is not None and view_count > max_views:
                return u'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
        age_limit = self.params.get('age_limit')
        if age_limit is not None:
            if age_limit < info_dict.get('age_limit', 0):
                return u'Skipping "' + title + '" because it is age restricted'
        if self.in_download_archive(info_dict):
-            return (u'%s has already been recorded in archive'
+            return u'%s has already been recorded in archive' % video_title
                    % info_dict.get('title', info_dict.get('id', u'video')))
        return None
    @staticmethod
@@ -554,16 +557,16 @@ class YoutubeDL(object):
            n_all_entries = len(ie_result['entries'])
            playliststart = self.params.get('playliststart', 1) - 1
-            playlistend = self.params.get('playlistend', -1)
+            playlistend = self.params.get('playlistend', None)
-
+            # For backwards compatibility, interpret -1 as whole list
            if playlistend == -1:
-                entries = ie_result['entries'][playliststart:]
+                playlistend = None
            else:
                entries = ie_result['entries'][playliststart:playlistend]
            entries = ie_result['entries'][playliststart:playlistend]
            n_entries = len(entries)
-            self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
+            self.to_screen(
                u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
                (ie_result['extractor'], playlist, n_all_entries, n_entries))
            for i, entry in enumerate(entries, 1):
@@ -748,6 +751,8 @@ class YoutubeDL(object):
            self.to_stdout(info_dict['description'])
        if self.params.get('forcefilename', False) and filename is not None:
            self.to_stdout(filename)
        if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
            self.to_stdout(formatSeconds(info_dict['duration']))
        if self.params.get('forceformat', False):
            self.to_stdout(info_dict['format'])
        if self.params.get('forcejson', False):
@@ -770,28 +775,34 @@ class YoutubeDL(object):
            return
        if self.params.get('writedescription', False):
-            try:
+            descfn = filename + u'.description'
-                descfn = filename + u'.description'
+            if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
-                self.report_writedescription(descfn)
+                self.to_screen(u'[info] Video description is already present')
-                with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
+            else:
-                    descfile.write(info_dict['description'])
+                try:
-            except (KeyError, TypeError):
+                    self.to_screen(u'[info] Writing video description to: ' + descfn)
-                self.report_warning(u'There\'s no description to write.')
+                    with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
-            except (OSError, IOError):
+                        descfile.write(info_dict['description'])
-                self.report_error(u'Cannot write description file ' + descfn)
+                except (KeyError, TypeError):
-                return
+                    self.report_warning(u'There\'s no description to write.')
                except (OSError, IOError):
                    self.report_error(u'Cannot write description file ' + descfn)
                    return
        if self.params.get('writeannotations', False):
-            try:
+            annofn = filename + u'.annotations.xml'
-                annofn = filename + u'.annotations.xml'
+            if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
-                self.report_writeannotations(annofn)
+                self.to_screen(u'[info] Video annotations are already present')
-                with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
+            else:
-                    annofile.write(info_dict['annotations'])
+                try:
-            except (KeyError, TypeError):
+                    self.to_screen(u'[info] Writing video annotations to: ' + annofn)
-                self.report_warning(u'There are no annotations to write.')
+                    with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
-            except (OSError, IOError):
+                        annofile.write(info_dict['annotations'])
-                self.report_error(u'Cannot write annotations file: ' + annofn)
+                except (KeyError, TypeError):
-                return
+                    self.report_warning(u'There are no annotations to write.')
                except (OSError, IOError):
                    self.report_error(u'Cannot write annotations file: ' + annofn)
                    return
        subtitles_are_requested = any([self.params.get('writesubtitles', False),
                                       self.params.get('writeautomaticsub')])
@@ -807,38 +818,48 @@ class YoutubeDL(object):
                    continue
                try:
                    sub_filename = subtitles_filename(filename, sub_lang, sub_format)
-                    self.report_writesubtitles(sub_filename)
+                    if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
-                    with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
+                        self.to_screen(u'[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
-                            subfile.write(sub)
+                    else:
                        self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
                        with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
                                subfile.write(sub)
                except (OSError, IOError):
                    self.report_error(u'Cannot write subtitles file ' + descfn)
                    return
        if self.params.get('writeinfojson', False):
            infofn = os.path.splitext(filename)[0] + u'.info.json'
-            self.report_writeinfojson(infofn)
+            if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
-            try:
+                self.to_screen(u'[info] Video description metadata is already present')
-                json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
+            else:
-                write_json_file(json_info_dict, encodeFilename(infofn))
+                self.to_screen(u'[info] Writing video description metadata as JSON to: ' + infofn)
-            except (OSError, IOError):
+                try:
-                self.report_error(u'Cannot write metadata to JSON file ' + infofn)
+                    json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
-                return
+                    write_json_file(json_info_dict, encodeFilename(infofn))
                except (OSError, IOError):
                    self.report_error(u'Cannot write metadata to JSON file ' + infofn)
                    return
        if self.params.get('writethumbnail', False):
            if info_dict.get('thumbnail') is not None:
                thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
                thumb_filename = os.path.splitext(filename)[0] + u'.' + thumb_format
-                self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
+                if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
-                               (info_dict['extractor'], info_dict['id']))
+                    self.to_screen(u'[%s] %s: Thumbnail is already present' %
-                try:
+                                   (info_dict['extractor'], info_dict['id']))
-                    uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
+                else:
-                    with open(thumb_filename, 'wb') as thumbf:
+                    self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
-                        shutil.copyfileobj(uf, thumbf)
+                                   (info_dict['extractor'], info_dict['id']))
-                    self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
+                    try:
-                        (info_dict['extractor'], info_dict['id'], thumb_filename))
+                        uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
-                except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+                        with open(thumb_filename, 'wb') as thumbf:
-                    self.report_warning(u'Unable to download thumbnail "%s": %s' %
+                            shutil.copyfileobj(uf, thumbf)
-                        (info_dict['thumbnail'], compat_str(err)))
+                        self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
                            (info_dict['extractor'], info_dict['id'], thumb_filename))
                    except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
                        self.report_warning(u'Unable to download thumbnail "%s": %s' %
                            (info_dict['thumbnail'], compat_str(err)))
        if not self.params.get('skip_download', False):
            if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@@ -37,6 +37,7 @@ __authors__  = (
    'Anton Larionov',
    'Takuya Tsuchida',
    'Sergey M.',
    'Michael Orlitzky',
 )
 __license__ = 'Public Domain'
@@ -62,6 +63,7 @@ from .utils import (
    MaxDownloadsReached,
    preferredencoding,
    SameFileError,
    setproctitle,
    std_headers,
    write_string,
 )
@@ -196,10 +198,14 @@ def parseOpts(overrideArguments=None):
        help=u'Work around terminals that lack bidirectional text support. Requires fribidi executable in PATH')
-    selection.add_option('--playlist-start',
+    selection.add_option(
-            dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is %default)', default=1)
+        '--playlist-start',
-    selection.add_option('--playlist-end',
+        dest='playliststart', metavar='NUMBER', default=1, type=int,
-            dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
+        help='playlist video to start at (default is %default)')
    selection.add_option(
        '--playlist-end',
        dest='playlistend', metavar='NUMBER', default=None, type=int,
        help='playlist video to end at (default is last)')
    selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
    selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
    selection.add_option('--max-downloads', metavar='NUMBER',
@@ -210,6 +216,14 @@ def parseOpts(overrideArguments=None):
    selection.add_option('--date', metavar='DATE', dest='date', help='download only videos uploaded in this date', default=None)
    selection.add_option('--datebefore', metavar='DATE', dest='datebefore', help='download only videos uploaded before this date', default=None)
    selection.add_option('--dateafter', metavar='DATE', dest='dateafter', help='download only videos uploaded after this date', default=None)
    selection.add_option(
        '--min-views', metavar='COUNT', dest='min_views',
        default=None, type=int,
        help="Do not download any videos with less than COUNT views",)
    selection.add_option(
        '--max-views', metavar='COUNT', dest='max_views',
        default=None, type=int,
        help="Do not download any videos with more than COUNT views",)
    selection.add_option('--no-playlist', action='store_true', dest='noplaylist', help='download only the currently playing video', default=False)
    selection.add_option('--age-limit', metavar='YEARS', dest='age_limit',
                         help='download only videos suitable for the given age',
@@ -290,6 +304,9 @@ def parseOpts(overrideArguments=None):
    verbosity.add_option('--get-description',
            action='store_true', dest='getdescription',
            help='simulate, quiet but print video description', default=False)
    verbosity.add_option('--get-duration',
            action='store_true', dest='getduration',
            help='simulate, quiet but print video length', default=False)
    verbosity.add_option('--get-filename',
            action='store_true', dest='getfilename',
            help='simulate, quiet but print output filename', default=False)
@@ -460,12 +477,15 @@ def parseOpts(overrideArguments=None):
    return parser, opts, args
 def _real_main(argv=None):
    # Compatibility fixes for Windows
    if sys.platform == 'win32':
        # https://github.com/rg3/youtube-dl/issues/820
        codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None)
    setproctitle(u'youtube-dl')
    parser, opts, args = parseOpts(argv)
    # Set user agent
@@ -560,18 +580,10 @@ def _real_main(argv=None):
        if numeric_buffersize is None:
            parser.error(u'invalid buffer size specified')
        opts.buffersize = numeric_buffersize
-    try:
+    if opts.playliststart <= 0:
-        opts.playliststart = int(opts.playliststart)
+        raise ValueError(u'Playlist start must be positive')
-        if opts.playliststart <= 0:
+    if opts.playlistend not in (-1, None) and opts.playlistend < opts.playliststart:
-            raise ValueError(u'Playlist start must be positive')
+        raise ValueError(u'Playlist end must be greater than playlist start')
    except (TypeError, ValueError):
        parser.error(u'invalid playlist start number specified')
    try:
        opts.playlistend = int(opts.playlistend)
        if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
            raise ValueError(u'Playlist end must be greater than playlist start')
    except (TypeError, ValueError):
        parser.error(u'invalid playlist end number specified')
    if opts.extractaudio:
        if opts.audioformat not in ['best', 'aac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']:
            parser.error(u'invalid audio format specified')
@@ -604,27 +616,30 @@ def _real_main(argv=None):
            or (opts.useid and u'%(id)s.%(ext)s')
            or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
            or u'%(title)s-%(id)s.%(ext)s')
-    if '%(ext)s' not in outtmpl and opts.extractaudio:
+    if not os.path.splitext(outtmpl)[1] and opts.extractaudio:
        parser.error(u'Cannot download a video and extract audio into the same'
-                     u' file! Use "%%(ext)s" instead of %r' %
+                     u' file! Use "{0}.%(ext)s" instead of "{0}" as the output'
-                     determine_ext(outtmpl, u''))
+                     u' template'.format(outtmpl))
    any_printing = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson
    ydl_opts = {
        'usenetrc': opts.usenetrc,
        'username': opts.username,
        'password': opts.password,
        'videopassword': opts.videopassword,
-        'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.dumpjson),
+        'quiet': (opts.quiet or any_printing),
        'forceurl': opts.geturl,
        'forcetitle': opts.gettitle,
        'forceid': opts.getid,
        'forcethumbnail': opts.getthumbnail,
        'forcedescription': opts.getdescription,
        'forceduration': opts.getduration,
        'forcefilename': opts.getfilename,
        'forceformat': opts.getformat,
        'forcejson': opts.dumpjson,
        'simulate': opts.simulate,
-        'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.dumpjson),
+        'skip_download': (opts.skip_download or opts.simulate or any_printing),
        'format': opts.format,
        'format_limit': opts.format_limit,
        'listformats': opts.listformats,
@@ -668,6 +683,8 @@ def _real_main(argv=None):
        'keepvideo': opts.keepvideo,
        'min_filesize': opts.min_filesize,
        'max_filesize': opts.max_filesize,
        'min_views': opts.min_views,
        'max_views': opts.max_views,
        'daterange': date,
        'cachedir': opts.cachedir,
        'youtube_print_sig_code': opts.youtube_print_sig_code,
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@@ -13,6 +13,7 @@ from .arte import (
 from .auengine import AUEngineIE
 from .bambuser import BambuserIE, BambuserChannelIE
 from .bandcamp import BandcampIE, BandcampAlbumIE
 from .blinkx import BlinkxIE
 from .bliptv import BlipTVIE, BlipTVUserIE
 from .bloomberg import BloombergIE
 from .breakcom import BreakIE
@@ -20,6 +21,8 @@ from .brightcove import BrightcoveIE
 from .c56 import C56IE
 from .canalplus import CanalplusIE
 from .canalc2 import Canalc2IE
 from .cbs import CBSIE
 from .channel9 import Channel9IE
 from .cinemassacre import CinemassacreIE
 from .clipfish import ClipfishIE
 from .clipsyndicate import ClipsyndicateIE
@@ -87,6 +90,7 @@ from .kickstarter import KickStarterIE
 from .keek import KeekIE
 from .liveleak import LiveLeakIE
 from .livestream import LivestreamIE, LivestreamOriginalIE
 from .mdr import MDRIE
 from .metacafe import MetacafeIE
 from .metacritic import MetacriticIE
 from .mit import TechTVMITIE, MITIE
@@ -111,9 +115,11 @@ from .orf import ORFIE
 from .pbs import PBSIE
 from .photobucket import PhotobucketIE
 from .podomatic import PodomaticIE
 from .pornhd import PornHdIE
 from .pornhub import PornHubIE
 from .pornotube import PornotubeIE
 from .pyvideo import PyvideoIE
 from .radiofrance import RadioFranceIE
 from .rbmaradio import RBMARadioIE
 from .redtube import RedTubeIE
 from .ringtv import RingTVIE
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@@ -266,20 +266,6 @@ class ArteTVDDCIE(ArteTVPlus7IE):
    IE_NAME = u'arte.tv:ddc'
    _VALID_URL = r'http?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>.+)'
    _TEST = {
        u'url': u'http://ddc.arte.tv/folge/neues-aus-mauretanien',
        u'file': u'049881-009_PLUS7-D.flv',
        u'info_dict': {
            u'title': u'Mit offenen Karten',
            u'description': u'md5:57929b0eaeddeb8a0c983f58e9ebd3b6',
            u'upload_date': u'20131207',
        },
        u'params': {
            # rtmp download
            u'skip_download': True,
        },
    }
    def _real_extract(self, url):
        video_id, lang = self._extract_url_info(url)
        if lang == 'folge':
--- a/youtube_dl/extractor/blinkx.py
+++ b/youtube_dl/extractor/blinkx.py
@@ -0,0 +1,86 @@
 import datetime
 import json
 import re
 from .common import InfoExtractor
 from ..utils import (
    remove_start,
 )
 class BlinkxIE(InfoExtractor):
    _VALID_URL = r'^(?:https?://(?:www\.)blinkx\.com/ce/|blinkx:)(?P<id>[^?]+)'
    _IE_NAME = u'blinkx'
    _TEST = {
        u'url': u'http://www.blinkx.com/ce/8aQUy7GVFYgFzpKhT0oqsilwOGFRVXk3R1ZGWWdGenBLaFQwb3FzaWx3OGFRVXk3R1ZGWWdGenB',
        u'file': u'8aQUy7GV.mp4',
        u'md5': u'2e9a07364af40163a908edbf10bb2492',
        u'info_dict': {
            u"title": u"Police Car Rolls Away",
            u"uploader": u"stupidvideos.com",
            u"upload_date": u"20131215",
            u"description": u"A police car gently rolls away from a fight. Maybe it felt weird being around a confrontation and just had to get out of there!",
            u"duration": 14.886,
            u"thumbnails": [{
                "width": 100,
                "height": 76,
                "url": "http://cdn.blinkx.com/stream/b/41/StupidVideos/20131215/1873969261/1873969261_tn_0.jpg",
            }],
        },
    }
    def _real_extract(self, url):
        m = re.match(self._VALID_URL, url)
        video_id = m.group('id')
        display_id = video_id[:8]
        api_url = (u'https://apib4.blinkx.com/api.php?action=play_video&' +
                   u'video=%s' % video_id)
        data_json = self._download_webpage(api_url, display_id)
        data = json.loads(data_json)['api']['results'][0]
        dt = datetime.datetime.fromtimestamp(data['pubdate_epoch'])
        upload_date = dt.strftime('%Y%m%d')
        duration = None
        thumbnails = []
        formats = []
        for m in data['media']:
            if m['type'] == 'jpg':
                thumbnails.append({
                    'url': m['link'],
                    'width': int(m['w']),
                    'height': int(m['h']),
                })
            elif m['type'] == 'original':
                duration = m['d']
            elif m['type'] in ('flv', 'mp4'):
                vcodec = remove_start(m['vcodec'], 'ff')
                acodec = remove_start(m['acodec'], 'ff')
                format_id = (u'%s-%sk-%s' %
                             (vcodec,
                              (int(m['vbr']) + int(m['abr'])) // 1000,
                              m['w']))
                formats.append({
                    'format_id': format_id,
                    'url': m['link'],
                    'vcodec': vcodec,
                    'acodec': acodec,
                    'abr': int(m['abr']) // 1000,
                    'vbr': int(m['vbr']) // 1000,
                    'width': int(m['w']),
                    'height': int(m['h']),
                })
        formats.sort(key=lambda f: (f['width'], f['vbr'], f['abr']))
        return {
            'id': display_id,
            'fullid': video_id,
            'title': data['title'],
            'formats': formats,
            'uploader': data['channel_name'],
            'upload_date': upload_date,
            'description': data.get('description'),
            'thumbnails': thumbnails,
            'duration': duration,
        }
--- a/youtube_dl/extractor/cbs.py
+++ b/youtube_dl/extractor/cbs.py
@@ -0,0 +1,30 @@
 import re
 from .common import InfoExtractor
 class CBSIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?cbs\.com/shows/[^/]+/video/(?P<id>[^/]+)/.*'
    _TEST = {
        u'url': u'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
        u'file': u'4JUVEwq3wUT7.flv',
        u'info_dict': {
            u'title': u'Connect Chat feat. Garth Brooks',
            u'description': u'Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS!',
            u'duration': 1495,
        },
        u'params': {
            # rtmp download
            u'skip_download': True,
        },
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        real_id = self._search_regex(
            r"video\.settings\.pid\s*=\s*'([^']+)';",
            webpage, u'real video ID')
        return self.url_result(u'theplatform:%s' % real_id)
--- a/youtube_dl/extractor/channel9.py
+++ b/youtube_dl/extractor/channel9.py
@@ -0,0 +1,267 @@
 # encoding: utf-8
 import re
 from .common import InfoExtractor
 from ..utils import ExtractorError
 class Channel9IE(InfoExtractor):
    '''
    Common extractor for channel9.msdn.com.
    The type of provided URL (video or playlist) is determined according to
    meta Search.PageType from web page HTML rather than URL itself, as it is
    not always possible to do.    
    '''
    IE_DESC = u'Channel 9'
    IE_NAME = u'channel9'
    _VALID_URL = r'^https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+)/?'
    _TESTS = [
        {
            u'url': u'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
            u'file': u'Events_TechEd_Australia_2013_KOS002.mp4',
            u'md5': u'bbd75296ba47916b754e73c3a4bbdf10',
            u'info_dict': {
                u'title': u'Developer Kick-Off Session: Stuff We Love',
                u'description': u'md5:c08d72240b7c87fcecafe2692f80e35f',
                u'duration': 4576,
                u'thumbnail': u'http://media.ch9.ms/ch9/9d51/03902f2d-fc97-4d3c-b195-0bfe15a19d51/KOS002_220.jpg',
                u'session_code': u'KOS002',
                u'session_day': u'Day 1',
                u'session_room': u'Arena 1A',
                u'session_speakers': [ u'Ed Blankenship', u'Andrew Coates', u'Brady Gaster', u'Patrick Klug', u'Mads Kristensen' ],
            },
        },
        {
            u'url': u'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
            u'file': u'posts_Self-service-BI-with-Power-BI-nuclear-testing.mp4',
            u'md5': u'b43ee4529d111bc37ba7ee4f34813e68',
            u'info_dict': {
                u'title': u'Self-service BI with Power BI - nuclear testing',
                u'description': u'md5:d1e6ecaafa7fb52a2cacdf9599829f5b',
                u'duration': 1540,
                u'thumbnail': u'http://media.ch9.ms/ch9/87e1/0300391f-a455-4c72-bec3-4422f19287e1/selfservicenuk_512.jpg',
                u'authors': [ u'Mike Wilmot' ],
            },
        }
    ]
    _RSS_URL = 'http://channel9.msdn.com/%s/RSS'
    # Sorted by quality
    _known_formats = ['MP3', 'MP4', 'Mid Quality WMV', 'Mid Quality MP4', 'High Quality WMV', 'High Quality MP4']
    def _restore_bytes(self, formatted_size):
        if not formatted_size:
            return 0
        m = re.match(r'^(?P<size>\d+(?:\.\d+)?)\s+(?P<units>[a-zA-Z]+)', formatted_size)
        if not m:
            return 0
        units = m.group('units')
        try:
            exponent = [u'B', u'KB', u'MB', u'GB', u'TB', u'PB', u'EB', u'ZB', u'YB'].index(units.upper())
        except ValueError:
            return 0
        size = float(m.group('size'))
        return int(size * (1024 ** exponent))
    def _formats_from_html(self, html):
        FORMAT_REGEX = r'''
            (?x)
            <a\s+href="(?P<url>[^"]+)">(?P<quality>[^<]+)</a>\s*
            <span\s+class="usage">\((?P<note>[^\)]+)\)</span>\s*
            (?:<div\s+class="popup\s+rounded">\s*
            <h3>File\s+size</h3>\s*(?P<filesize>.*?)\s*
            </div>)?                                                # File size part may be missing
        '''
        # Extract known formats
        formats = [{'url': x.group('url'),
                 'format_id': x.group('quality'),
                 'format_note': x.group('note'),
                 'format': '%s (%s)' % (x.group('quality'), x.group('note')), 
                 'filesize': self._restore_bytes(x.group('filesize')), # File size is approximate
                 } for x in list(re.finditer(FORMAT_REGEX, html)) if x.group('quality') in self._known_formats]
        # Sort according to known formats list
        formats.sort(key=lambda fmt: self._known_formats.index(fmt['format_id']))
        return formats
    def _extract_title(self, html):
        title = self._html_search_meta(u'title', html, u'title')
        if title is None:           
            title = self._og_search_title(html)
            TITLE_SUFFIX = u' (Channel 9)'
            if title is not None and title.endswith(TITLE_SUFFIX):
                title = title[:-len(TITLE_SUFFIX)]
        return title
    def _extract_description(self, html):
        DESCRIPTION_REGEX = r'''(?sx)
            <div\s+class="entry-content">\s*
            <div\s+id="entry-body">\s*
            (?P<description>.+?)\s*
            </div>\s*
            </div>
        '''
        m = re.search(DESCRIPTION_REGEX, html)
        if m is not None:
            return m.group('description')
        return self._html_search_meta(u'description', html, u'description')
    def _extract_duration(self, html):
        m = re.search(r'data-video_duration="(?P<hours>\d{2}):(?P<minutes>\d{2}):(?P<seconds>\d{2})"', html)
        return ((int(m.group('hours')) * 60 * 60) + (int(m.group('minutes')) * 60) + int(m.group('seconds'))) if m else None
    def _extract_slides(self, html):
        m = re.search(r'<a href="(?P<slidesurl>[^"]+)" class="slides">Slides</a>', html)
        return m.group('slidesurl') if m is not None else None
    def _extract_zip(self, html):
        m = re.search(r'<a href="(?P<zipurl>[^"]+)" class="zip">Zip</a>', html)
        return m.group('zipurl') if m is not None else None
    def _extract_avg_rating(self, html):
        m = re.search(r'<p class="avg-rating">Avg Rating: <span>(?P<avgrating>[^<]+)</span></p>', html)
        return float(m.group('avgrating')) if m is not None else 0
    def _extract_rating_count(self, html):
        m = re.search(r'<div class="rating-count">\((?P<ratingcount>[^<]+)\)</div>', html)
        return int(self._fix_count(m.group('ratingcount'))) if m is not None else 0
    def _extract_view_count(self, html):
        m = re.search(r'<li class="views">\s*<span class="count">(?P<viewcount>[^<]+)</span> Views\s*</li>', html)
        return int(self._fix_count(m.group('viewcount'))) if m is not None else 0
    def _extract_comment_count(self, html):
        m = re.search(r'<li class="comments">\s*<a href="#comments">\s*<span class="count">(?P<commentcount>[^<]+)</span> Comments\s*</a>\s*</li>', html)
        return int(self._fix_count(m.group('commentcount'))) if m is not None else 0
    def _fix_count(self, count):
        return int(str(count).replace(',', '')) if count is not None else None
    def _extract_authors(self, html):
        m = re.search(r'(?s)<li class="author">(.*?)</li>', html)
        if m is None:
            return None
        return re.findall(r'<a href="/Niners/[^"]+">([^<]+)</a>', m.group(1))
    def _extract_session_code(self, html):
        m = re.search(r'<li class="code">\s*(?P<code>.+?)\s*</li>', html)
        return m.group('code') if m is not None else None
    def _extract_session_day(self, html):
        m = re.search(r'<li class="day">\s*<a href="/Events/[^"]+">(?P<day>[^<]+)</a>\s*</li>', html)
        return m.group('day') if m is not None else None
    def _extract_session_room(self, html):
        m = re.search(r'<li class="room">\s*(?P<room>.+?)\s*</li>', html)
        return m.group('room') if m is not None else None
    def _extract_session_speakers(self, html):
        return re.findall(r'<a href="/Events/Speakers/[^"]+">([^<]+)</a>', html)
    def _extract_content(self, html, content_path):
        # Look for downloadable content        
        formats = self._formats_from_html(html)
        slides = self._extract_slides(html)
        zip_ = self._extract_zip(html)
        # Nothing to download
        if len(formats) == 0 and slides is None and zip_ is None:
            self._downloader.report_warning(u'None of recording, slides or zip are available for %s' % content_path)
            return
        # Extract meta
        title = self._extract_title(html)
        description = self._extract_description(html)
        thumbnail = self._og_search_thumbnail(html)
        duration = self._extract_duration(html)
        avg_rating = self._extract_avg_rating(html)
        rating_count = self._extract_rating_count(html)
        view_count = self._extract_view_count(html)
        comment_count = self._extract_comment_count(html)
        common = {'_type': 'video',
                  'id': content_path,
                  'description': description,
                  'thumbnail': thumbnail,
                  'duration': duration,
                  'avg_rating': avg_rating,
                  'rating_count': rating_count,
                  'view_count': view_count,
                  'comment_count': comment_count,
                }
        result = []
        if slides is not None:
            d = common.copy()
            d.update({ 'title': title + '-Slides', 'url': slides })
            result.append(d)
        if zip_ is not None:
            d = common.copy()
            d.update({ 'title': title + '-Zip', 'url': zip_ })
            result.append(d)
        if len(formats) > 0:
            d = common.copy()
            d.update({ 'title': title, 'formats': formats })
            result.append(d)
        return result
    def _extract_entry_item(self, html, content_path):
        contents = self._extract_content(html, content_path)
        if contents is None:
            return contents
        authors = self._extract_authors(html)
        for content in contents:
            content['authors'] = authors
        return contents
    def _extract_session(self, html, content_path):
        contents = self._extract_content(html, content_path)
        if contents is None:
            return contents
        session_meta = {'session_code': self._extract_session_code(html),
                        'session_day': self._extract_session_day(html),
                        'session_room': self._extract_session_room(html),
                        'session_speakers': self._extract_session_speakers(html),
                        }
        for content in contents:
            content.update(session_meta)
        return contents
    def _extract_list(self, content_path):
        rss = self._download_xml(self._RSS_URL % content_path, content_path, u'Downloading RSS')
        entries = [self.url_result(session_url.text, 'Channel9')
                   for session_url in rss.findall('./channel/item/link')]
        title_text = rss.find('./channel/title').text
        return self.playlist_result(entries, content_path, title_text)
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        content_path = mobj.group('contentpath')
        webpage = self._download_webpage(url, content_path, u'Downloading web page')
        page_type_m = re.search(r'<meta name="Search.PageType" content="(?P<pagetype>[^"]+)"/>', webpage)
        if page_type_m is None:
            raise ExtractorError(u'Search.PageType not found, don\'t know how to process this page', expected=True)
        page_type = page_type_m.group('pagetype')
        if page_type == 'List':         # List page, may contain list of 'item'-like objects
            return self._extract_list(content_path)
        elif page_type == 'Entry.Item': # Any 'item'-like page, may contain downloadable content
            return self._extract_entry_item(webpage, content_path)
        elif page_type == 'Session':    # Event session page, may contain downloadable content
            return self._extract_session(webpage, content_path)
        else:
            raise ExtractorError(u'Unexpected Search.PageType %s' % page_type, expected=True)
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -34,15 +34,39 @@ class InfoExtractor(object):
    The dictionaries must include the following fields:
    id:             Video identifier.
    url:            Final video URL.
    title:          Video title, unescaped.
    ext:            Video filename extension.
-    Instead of url and ext, formats can also specified.
+    Additionally, it must contain either a formats entry or url and ext:
    formats:        A list of dictionaries for each format available, it must
                    be ordered from worst to best quality. Potential fields:
                    * url        Mandatory. The URL of the video file
                    * ext        Will be calculated from url if missing
                    * format     A human-readable description of the format
                                 ("mp4 container with h264/opus").
                                 Calculated from the format_id, width, height.
                                 and format_note fields if missing.
                    * format_id  A short description of the format
                                 ("mp4_h264_opus" or "19")
                    * format_note Additional info about the format
                                 ("3D" or "DASH video")
                    * width      Width of the video, if known
                    * height     Height of the video, if known
                    * abr        Average audio bitrate in KBit/s
                    * acodec     Name of the audio codec in use
                    * vbr        Average video bitrate in KBit/s
                    * vcodec     Name of the video codec in use
                    * filesize   The number of bytes, if known in advance
                    * player_url SWF Player URL (used for rtmpdump).
    url:            Final video URL.
    ext:            Video filename extension.
    format:         The video format, defaults to ext (used for --get-format)
    player_url:     SWF Player URL (used for rtmpdump).
    urlhandle:      [internal] The urlHandle to be used to download the file,
                    like returned by urllib.request.urlopen
    The following fields are optional:
    format:         The video format, defaults to ext (used for --get-format)
    thumbnails:     A list of dictionaries (with the entries "resolution" and
                    "url") for the varying thumbnails
    thumbnail:      Full URL to a video thumbnail image.
@@ -51,35 +75,14 @@ class InfoExtractor(object):
    upload_date:    Video upload date (YYYYMMDD).
    uploader_id:    Nickname or id of the video uploader.
    location:       Physical location of the video.
    player_url:     SWF Player URL (used for rtmpdump).
    subtitles:      The subtitle file contents as a dictionary in the format
                    {language: subtitles}.
    duration:       Length of the video in seconds, as an integer.
    view_count:     How many users have watched the video on the platform.
    like_count:     Number of positive ratings of the video
    dislike_count:  Number of negative ratings of the video
    comment_count:  Number of comments on the video
    urlhandle:      [internal] The urlHandle to be used to download the file,
                    like returned by urllib.request.urlopen
    age_limit:      Age restriction for the video, as an integer (years)
    formats:        A list of dictionaries for each format available, it must
                    be ordered from worst to best quality. Potential fields:
                    * url       Mandatory. The URL of the video file
                    * ext       Will be calculated from url if missing
                    * format    A human-readable description of the format
                                ("mp4 container with h264/opus").
                                Calculated from the format_id, width, height.
                                and format_note fields if missing.
                    * format_id A short description of the format
                                ("mp4_h264_opus" or "19")
                    * format_note Additional info about the format
                                ("3D" or "DASH video")
                    * width     Width of the video, if known
                    * height    Height of the video, if known
                    * abr       Average audio bitrate in KBit/s
                    * acodec    Name of the audio codec in use
                    * vbr       Average video bitrate in KBit/s
                    * vcodec    Name of the video codec in use
                    * filesize  The number of bytes, if known in advance
    webpage_url:    The url to the video webpage, if given to youtube-dl it
                    should allow to get the same result again. (It will be set
                    by YoutubeDL if it's missing)
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -28,7 +28,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
 class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
    """Information Extractor for Dailymotion"""
-    _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/(?:embed/)?video/([^/]+)'
+    _VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(embed|#)/)?video/(?P<id>[^/?_]+)'
    IE_NAME = u'dailymotion'
    _FORMATS = [
@@ -81,7 +81,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
        # Extract id and simplified title from URL
        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group(1).split('_')[0].split('?')[0]
+        video_id = mobj.group('id')
        url = 'http://www.dailymotion.com/video/%s' % video_id
--- a/youtube_dl/extractor/daum.py
+++ b/youtube_dl/extractor/daum.py
@@ -9,7 +9,7 @@ from ..utils import (
 class DaumIE(InfoExtractor):
-    _VALID_URL = r'https?://tvpot\.daum\.net/.*?clipid=(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/.*?clipid=(?P<id>\d+)'
    IE_NAME = u'daum.net'
    _TEST = {
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@@ -17,7 +17,7 @@ from ..utils import (
 class FacebookIE(InfoExtractor):
    """Information Extractor for Facebook"""
-    _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
+    _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:[^#?]*#!/)?(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
    _LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1'
    _CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1'
    _NETRC_MACHINE = 'facebook'
@@ -27,7 +27,7 @@ class FacebookIE(InfoExtractor):
        u'file': u'120708114770723.mp4',
        u'md5': u'48975a41ccc4b7a581abd68651c1a5a8',
        u'info_dict': {
-            u"duration": 279, 
+            u"duration": 279,
            u"title": u"PEOPLE ARE AWESOME 2013"
        }
    }
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -222,6 +222,18 @@ class GenericIE(InfoExtractor):
                'id': video_id,
            }
        # Look for embedded blip.tv player
        mobj = re.search(r'<meta\s[^>]*https?://api.blip.tv/\w+/redirect/\w+/(\d+)', webpage)
        if mobj:
            return self.url_result('http://blip.tv/seo/-'+mobj.group(1), 'BlipTV')
        mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*https?://(?:\w+\.)?blip.tv/(?:play/|api\.swf#)([a-zA-Z0-9]+)', webpage)
        if mobj:
            player_url = 'http://blip.tv/play/%s.x?p=1' % mobj.group(1)
            player_page = self._download_webpage(player_url, mobj.group(1))
            blip_video_id = self._search_regex(r'data-episode-id="(\d+)', player_page, u'blip_video_id', fatal=False)
            if blip_video_id:
                return self.url_result('http://blip.tv/seo/-'+blip_video_id, 'BlipTV')
        # Look for Bandcamp pages with custom domain
        mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
        if mobj is not None:
@@ -229,6 +241,12 @@ class GenericIE(InfoExtractor):
            # Don't set the extractor because it can be a track url or an album
            return self.url_result(burl)
        # Look for embedded Vevo player
        mobj = re.search(
            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'))
        # Start with something easy: JW Player in SWFObject
        mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
        if mobj is None:
--- a/youtube_dl/extractor/ign.py
+++ b/youtube_dl/extractor/ign.py
@@ -44,7 +44,7 @@ class IGNIE(InfoExtractor):
                {
                    u'file': u'638672ee848ae4ff108df2a296418ee2.mp4',
                    u'info_dict': {
-                        u'title': u'GTA 5\'s Twisted Beauty in Super Slow Motion',
+                        u'title': u'26 Twisted Moments from GTA 5 in Slow Motion',
                        u'description': u'The twisted beauty of GTA 5 in stunning slow motion.',
                    },
                },
--- a/youtube_dl/extractor/mdr.py
+++ b/youtube_dl/extractor/mdr.py
@@ -0,0 +1,78 @@
 import re
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
 )
 class MDRIE(InfoExtractor):
    _VALID_URL = r'^(?P<domain>(?:https?://)?(?:www\.)?mdr\.de)/mediathek/(?:.*)/(?P<type>video|audio)(?P<video_id>[^/_]+)_.*'
    _TESTS = [{
        u'url': u'http://www.mdr.de/mediathek/themen/nachrichten/video165624_zc-c5c7de76_zs-3795826d.html',
        u'file': u'165624.mp4',
        u'md5': u'ae785f36ecbf2f19b42edf1bc9c85815',
        u'info_dict': {
            u"title": u"MDR aktuell Eins30 09.12.2013, 22:48 Uhr"
        },
    },
    {
        u'url': u'http://www.mdr.de/mediathek/radio/mdr1-radio-sachsen/audio718370_zc-67b21197_zs-1b9b2483.html',
        u'file': u'718370.mp3',
        u'md5': u'a9d21345a234c7b45dee612f290fd8d7',
        u'info_dict': {
            u"title": u"MDR 1 RADIO SACHSEN 10.12.2013, 05:00 Uhr"
        },
    }]
    def _real_extract(self, url):
        m = re.match(self._VALID_URL, url)
        video_id = m.group('video_id')
        domain = m.group('domain')
        # determine title and media streams from webpage
        html = self._download_webpage(url, video_id)
        title = self._html_search_regex(r'<h2>(.*?)</h2>', html, u'title')
        xmlurl = self._search_regex(
            r'(/mediathek/(?:.+)/(?:video|audio)[0-9]+-avCustom.xml)', html, u'XML URL')
        doc = self._download_xml(domain + xmlurl, video_id)
        formats = []
        for a in doc.findall('./assets/asset'):
            url_el = a.find('.//progressiveDownloadUrl')
            if url_el is None:
                continue
            abr = int(a.find('bitrateAudio').text) // 1000
            media_type = a.find('mediaType').text
            format = {
                'abr': abr,
                'filesize': int(a.find('fileSize').text),
                'url': url_el.text,
            }
            vbr_el = a.find('bitrateVideo')
            if vbr_el is None:
                format.update({
                    'vcodec': 'none',
                    'format_id': u'%s-%d' % (media_type, abr),
                })
            else:
                vbr = int(vbr_el.text) // 1000
                format.update({
                    'vbr': vbr,
                    'width': int(a.find('frameWidth').text),
                    'height': int(a.find('frameHeight').text),
                    'format_id': u'%s-%d' % (media_type, vbr),
                })
            formats.append(format)
        formats.sort(key=lambda f: (f.get('vbr'), f['abr']))
        if not formats:
            raise ExtractorError(u'Could not find any valid formats')
        return {
            'id': video_id,
            'title': title,
            'formats': formats,
        }
--- a/youtube_dl/extractor/naver.py
+++ b/youtube_dl/extractor/naver.py
@@ -9,7 +9,7 @@ from ..utils import (
 class NaverIE(InfoExtractor):
-    _VALID_URL = r'https?://tvcast\.naver\.com/v/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:m\.)?tvcast\.naver\.com/v/(?P<id>\d+)'
    _TEST = {
        u'url': u'http://tvcast.naver.com/v/81652',
--- a/youtube_dl/extractor/ndtv.py
+++ b/youtube_dl/extractor/ndtv.py
@@ -1,6 +1,4 @@
 import json
 import re
 import time
 from .common import InfoExtractor
 from ..utils import month_by_name
--- a/youtube_dl/extractor/pornhd.py
+++ b/youtube_dl/extractor/pornhd.py
@@ -0,0 +1,38 @@
 import re
 from .common import InfoExtractor
 from ..utils import compat_urllib_parse
 class PornHdIE(InfoExtractor):
    _VALID_URL = r'(?:http://)?(?:www\.)?pornhd\.com/videos/(?P<video_id>[0-9]+)/(?P<video_title>.+)'
    _TEST = {
        u'url': u'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video',
        u'file': u'1962.flv',
        u'md5': u'35272469887dca97abd30abecc6cdf75',
        u'info_dict': {
            u"title": u"sierra-day-gets-his-cum-all-over-herself-hd-porn-video",
            u"age_limit": 18,
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('video_id')
        video_title = mobj.group('video_title')
        webpage = self._download_webpage(url, video_id)
        video_url = self._html_search_regex(
            r'&hd=(http.+?)&', webpage, u'video URL')
        video_url = compat_urllib_parse.unquote(video_url)
        age_limit = 18
        return {
            'id': video_id,
            'url': video_url,
            'ext': 'flv',
            'title': video_title,
            'age_limit': age_limit,
        }
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@@ -12,7 +12,7 @@ from ..aes import (
 )
 class PornHubIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>pornhub\.com/view_video\.php\?viewkey=(?P<videoid>[0-9]+))'
+    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>pornhub\.com/view_video\.php\?viewkey=(?P<videoid>[0-9a-f]+))'
    _TEST = {
        u'url': u'http://www.pornhub.com/view_video.php?viewkey=648719015',
        u'file': u'648719015.mp4',
--- a/youtube_dl/extractor/radiofrance.py
+++ b/youtube_dl/extractor/radiofrance.py
@@ -0,0 +1,60 @@
 # coding: utf-8
 import datetime
 import json
 import re
 from .common import InfoExtractor
 from ..utils import (
    remove_start,
 )
 class RadioFranceIE(InfoExtractor):
    _VALID_URL = r'^https?://maison\.radiofrance\.fr/radiovisions/(?P<id>[^?#]+)'
    IE_NAME = u'radiofrance'
    _TEST = {
        u'url': u'http://maison.radiofrance.fr/radiovisions/one-one',
        u'file': u'one-one.mp4',
        u'md5': u'todo',
        u'info_dict': {
            u"title": u"One to one",
            u"description": u"Plutôt que d'imaginer la radio de demain comme technologie ou comme création de contenu, je veux montrer que quelles que soient ses évolutions, j'ai l'intime conviction que la radio continuera d'être un grand média de proximité pour les auditeurs.",
            u"uploader": u"ferdi",
        },
    }
    def _real_extract(self, url):
        m = re.match(self._VALID_URL, url)
        video_id = m.group('id')
        webpage = self._download_webpage(url, video_id)
        title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, u'title')
        description = self._html_search_regex(
            r'<div class="bloc_page_wrapper"><div class="text">(.*?)</div>',
            webpage, u'description', fatal=False)
        uploader = self._html_search_regex(
            r'<div class="credit">&nbsp;&nbsp;&copy;&nbsp;(.*?)</div>',
            webpage, u'uploader', fatal=False)
        formats_str = self._html_search_regex(
            r'class="jp-jplayer[^"]*" data-source="([^"]+)">',
            webpage, u'audio URLs')
        formats = [
            {
                'format_id': m[0],
                'url': m[1],
                'vcodec': 'none',
            }
            for m in
            re.findall(r"([a-z0-9]+)\s*:\s*'([^']+)'", formats_str)
        ]
        # No sorting, we don't know any more about these formats
        return {
            'id': video_id,
            'title': title,
            'formats': formats,
            'description': description,
            'uploader': uploader,
        }
--- a/youtube_dl/extractor/theplatform.py
+++ b/youtube_dl/extractor/theplatform.py
@@ -3,6 +3,7 @@ import json
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    xpath_with_ns,
 )
@@ -32,6 +33,17 @@ class ThePlatformIE(InfoExtractor):
        smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?'
            'format=smil&mbr=true'.format(video_id))
        meta = self._download_xml(smil_url, video_id)
        try:
            error_msg = next(
                n.attrib['abstract']
                for n in meta.findall(_x('.//smil:ref'))
                if n.attrib.get('title') == u'Geographic Restriction')
        except StopIteration:
            pass
        else:
            raise ExtractorError(error_msg, expected=True)
        info_url = 'http://link.theplatform.com/s/dJ5BDC/{0}?format=preview'.format(video_id)
        info_json = self._download_webpage(info_url, video_id)
        info = json.loads(info_json)
--- a/youtube_dl/extractor/vevo.py
+++ b/youtube_dl/extractor/vevo.py
@@ -15,7 +15,11 @@ class VevoIE(InfoExtractor):
    Accepts urls from vevo.com or in the format 'vevo:{id}'
    (currently used by MTVIE)
    """
-    _VALID_URL = r'((http://www\.vevo\.com/watch/(?:[^/]+/[^/]+/)?)|(vevo:))(?P<id>.*?)(\?|$)'
+    _VALID_URL = r'''(?x)
        (?:https?://www\.vevo\.com/watch/(?:[^/]+/[^/]+/)?|
           https?://cache\.vevo\.com/m/html/embed\.html\?video=|
           vevo:)
        (?P<id>[^&?#]+)'''
    _TESTS = [{
        u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
        u'file': u'GB1101300280.mp4',
--- a/youtube_dl/extractor/videopremium.py
+++ b/youtube_dl/extractor/videopremium.py
@@ -15,6 +15,7 @@ class VideoPremiumIE(InfoExtractor):
        u'params': {
            u'skip_download': True,
        },
        u'skip': u'Test file has been deleted.',
    }
    def _real_extract(self, url):
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1377,9 +1377,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
        if 'length_seconds' not in video_info:
            self._downloader.report_warning(u'unable to extract video duration')
-            video_duration = ''
+            video_duration = None
        else:
-            video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
+            video_duration = int(compat_urllib_parse.unquote_plus(video_info['length_seconds'][0]))
        # annotations
        video_annotations = None
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 import ctypes
 import datetime
 import email.utils
 import errno
@@ -1051,7 +1052,7 @@ def month_by_name(name):
    """ Return the number of a month by (locale-independently) English name """
    ENGLISH_NAMES = [
-        u'Januar', u'February', u'March', u'April', u'May', u'June',
+        u'January', u'February', u'March', u'April', u'May', u'June',
        u'July', u'August', u'September', u'October', u'November', u'December']
    try:
        return ENGLISH_NAMES.index(name) + 1
@@ -1062,3 +1063,24 @@ def month_by_name(name):
 def fix_xml_all_ampersand(xml_str):
    """Replace all the '&' by '&amp;' in XML"""
    return xml_str.replace(u'&', u'&amp;')
 def setproctitle(title):
    assert isinstance(title, type(u''))
    try:
        libc = ctypes.cdll.LoadLibrary("libc.so.6")
    except OSError:
        return
    title = title
    buf = ctypes.create_string_buffer(len(title) + 1)
    buf.value = title.encode('utf-8')
    try:
        libc.prctl(15, ctypes.byref(buf), 0, 0, 0)
    except AttributeError:
        return  # Strange libc, just skip this
 def remove_start(s, start):
    if s.startswith(start):
        return s[len(start):]
    return s
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
-__version__ = '2013.12.11.1'
+__version__ = '2013.12.16.6'
Author	SHA1	Message	Date
Philipp Hagemeister	83c632dc43	release 2013.12.16.6	2013-12-16 21:46:16 +01:00
Philipp Hagemeister	ff07a05575	Merge branch 'master' of github.com:rg3/youtube-dl	2013-12-16 21:46:11 +01:00
Philipp Hagemeister	f25571ffbf	Add support for embedded vevo player (Fixes #1957 )	2013-12-16 21:45:21 +01:00
Jaime Marquínez Ferrándiz	f7a6892572	[arte:ddc] Remove test video seems to expire in 7 days, as arte+7	2013-12-16 21:42:41 +01:00
Philipp Hagemeister	8fe56478f8	release 2013.12.16.5	2013-12-16 21:34:47 +01:00
Philipp Hagemeister	0e2a436dce	[radiofrance] Add support (Fixes #1942 )	2013-12-16 21:34:41 +01:00
Philipp Hagemeister	24050dd11c	release 2013.12.16.4	2013-12-16 21:10:18 +01:00
Philipp Hagemeister	8c8e3eec79	[facebook] Recognize #! URLs (Fixes #1988 )	2013-12-16 21:10:06 +01:00
Philipp Hagemeister	7ebc9dee69	Merge pull request #1987 from rzhxeo/blip [GenericIE] Add support for embedded blip.tv	2013-12-16 11:28:34 -08:00
rzhxeo	ee3e63e477	[GenericIE] Add support for embedded blip.tv	2013-12-16 20:08:23 +01:00
Philipp Hagemeister	e9c424c144	Merge pull request #1984 from alimirjamali/patch-1 Incorrect variable is used to check whether thumbnail exists	2013-12-16 09:04:36 -08:00
alimirjamali	0a9ce268ba	Incorrect variable is used to check whether thumbnail exists Dear @phihag I believe in line 848, the correct variable to check is 'thumb_filename' rather than 'infofn' Kindly advise Mit freundlichen Gruessen Ali	2013-12-16 20:14:28 +03:30
Philipp Hagemeister	4b2da48ea7	release 2013.12.16.3	2013-12-16 14:44:29 +01:00
Philipp Hagemeister	e64eaaa97d	Fix execution under Python 3	2013-12-16 14:44:17 +01:00
Philipp Hagemeister	780603027f	[videopremium] Skip test	2013-12-16 14:42:07 +01:00
Philipp Hagemeister	00902cd601	release 2013.12.16.2	2013-12-16 14:13:51 +01:00
Philipp Hagemeister	d67b0b1596	Reorder info_dict documentation	2013-12-16 14:13:40 +01:00
Philipp Hagemeister	d7dda16888	[blinkx] Add extractor (Fixes #1972 )	2013-12-16 13:56:30 +01:00
Philipp Hagemeister	a19fd00cc4	Simplify --playlist-start / --playlist-end interface	2013-12-16 13:16:20 +01:00
Philipp Hagemeister	d66152a898	[ndtv] Remove unused imports	2013-12-16 08:16:38 +01:00
Philipp Hagemeister	8c5f0c9fbc	[mdr] Clean up	2013-12-16 08:16:11 +01:00
Philipp Hagemeister	6888a874a1	release 2013.12.16.1	2013-12-16 05:45:15 +01:00
Philipp Hagemeister	09dacfa57f	[mdr] Simplify	2013-12-16 05:44:34 +01:00
Philipp Hagemeister	b2ae513586	Merge remote-tracking branch 'mc2avr/master'	2013-12-16 05:14:03 +01:00
Philipp Hagemeister	e4a0489f6e	Merge remote-tracking branch 'dstftw/channel9' Conflicts: youtube_dl/extractor/__init__.py	2013-12-16 05:14:00 +01:00
Philipp Hagemeister	b83be81d27	Credit @mjorlitzky for pornhd (#1961 )	2013-12-16 05:11:19 +01:00
Philipp Hagemeister	6f5dcd4eee	[pornhd] Simplify	2013-12-16 05:10:42 +01:00
Philipp Hagemeister	1bb2fc98e0	Merge remote-tracking branch 'mjorlitzky/master'	2013-12-16 05:07:58 +01:00
Philipp Hagemeister	e3946f989e	Set process title to youtube-dl This allows killing all youtube-dl processes with killall youtube-dl, and shows up nicer in some programs.	2013-12-16 05:04:55 +01:00
Philipp Hagemeister	8863d0de91	release 2013.12.16	2013-12-16 04:45:32 +01:00
Philipp Hagemeister	7b6fefc9d4	Apply --no-overwrites for --write-* files as well (Fixes #1980 )	2013-12-16 04:39:13 +01:00
Philipp Hagemeister	525ef9227f	Add --get-duration (Fixes #859 )	2013-12-16 04:15:10 +01:00
Philipp Hagemeister	c0ba0f4859	Document duration field	2013-12-16 04:09:43 +01:00
Philipp Hagemeister	b466b7029d	[youtube] Make duration an integer or None	2013-12-16 04:09:05 +01:00
Philipp Hagemeister	fa3ae234e0	[cbs] Add extractor (Fixes #1977 )	2013-12-16 03:53:43 +01:00
Philipp Hagemeister	48462108f3	[theplatform] Fix geographic restriction check	2013-12-16 03:43:45 +01:00
Philipp Hagemeister	f8b56e95b8	[theplatform] Detect geoblocked content	2013-12-16 03:34:46 +01:00
Philipp Hagemeister	5fe18bdbde	Add --min-views / --max-views (Fixes #1979 )	2013-12-16 03:09:49 +01:00
Jaime Marquínez Ferrándiz	dca02c80bc	Fix detection of the extension if the 'extractaudio' is given and improve the error message (#1969 ) Using 'foo.mp4' shouldn't raise an error. If 'foo' is given suggest using 'foo.%(ext)s' for the template	2013-12-15 11:42:38 +01:00
Jaime Marquínez Ferrándiz	9ee859b683	[daylimotion] Add support for urls from the mobile site (fixes #1953 ) It uses the 'touch' subdomain and adds a '#' before 'video'	2013-12-14 14:20:12 +01:00
Michael Orlitzky	8e05c870b4	Add support for pornhd.com.	2013-12-13 22:24:32 -05:00
Jaime Marquínez Ferrándiz	5d574e143f	[ign] Update one of test video's title	2013-12-13 17:04:40 +01:00
Philipp Hagemeister	2a203a6cda	Merge pull request #1956 from dstftw/master Fix typo in month name	2013-12-13 07:41:34 -08:00
dst	dadb8184e4	Fix typo in month name	2013-12-13 22:27:37 +07:00
Jaime Marquínez Ferrándiz	7a563df90a	[daum] Recognize mobile urls (#1952 )	2013-12-12 13:05:38 +01:00
Jaime Marquínez Ferrándiz	24b173fa5c	[naver] Recognize mobile urls (fixes #1951 )	2013-12-12 13:04:02 +01:00
dst	9b17ba0fa5	[channel9] Fix test description md5	2013-12-12 16:10:17 +07:00
dst	211f555d4c	[channel9] Missing import in __init__	2013-12-12 15:55:31 +07:00
dst	4d2ebb6bd7	[channel9] Cleanup	2013-12-12 15:19:23 +07:00
dst	df53747436	[channel9] Initial implementation (#1885 )	2013-12-12 15:13:45 +07:00
Philipp Hagemeister	f2c36ee43e	release 2013.12.11.2	2013-12-11 09:22:25 +01:00
Philipp Hagemeister	00381b4ccb	[pornhub] Fix URL regexp	2013-12-11 09:22:08 +01:00
mc2avr	df1d7da2af	add MDRIE	2013-12-10 18:40:50 +01:00
`@@ -1,2 +1,2 @@`

	`__version__ = '2013.12.11.1'`	`__version__ = '2013.12.16.6'`