release 2016.01.31

[youtube] Filter duplicates in playlists base extractor
[youtube] Use authentication for entry list base extractor (Closes #8380 )
2026-03-30 19:02:20 +00:00 · 2016-01-31 12:57:18 +01:00 · 2016-01-31 17:52:02 +06:00 · 2016-01-31 17:49:59 +06:00 · 2016-01-31 07:15:43 +08:00 · 2016-01-31 04:41:18 +06:00
39 changed files with 477 additions and 126 deletions
--- a/1
+++ b/1
@@ -155,3 +155,4 @@ Vignesh Venkat
 Tom Gijselinck
 Founder Fang
 Andrew Alexeyew
+Saso Bezlaj
--- a/README.md
+++ b/README.md
@@ -173,6 +173,10 @@ which means you can modify it, redistribute it or use it however you like.
                                     expected filesize (experimental)
    --hls-prefer-native              Use the native HLS downloader instead of
                                     ffmpeg (experimental)
+    --hls-use-mpegts                 Use the mpegts container for HLS videos,
+                                     allowing to play the video while
+                                     downloading (some players may not be able
+                                     to play it)
    --external-downloader COMMAND    Use the specified external downloader.
                                     Currently supports
                                     aria2c,axel,curl,httpie,wget
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -55,6 +55,7 @@
 - **audiomack**
 - **audiomack:album**
 - **Azubu**
+ - **AzubuLive**
 - **BaiduVideo**: 百度视频
 - **bambuser**
 - **bambuser:channel**
@@ -315,6 +316,7 @@
 - **mailru**: Видео@Mail.Ru
 - **MakerTV**
 - **Malemotion**
+ - **MatchTV**
 - **MDR**: MDR.DE and KiKA
 - **media.ccc.de**
 - **metacafe**
@@ -507,6 +509,7 @@
 - **Sapo**: SAPO Vídeos
 - **savefrom.net**
 - **SBS**: sbs.com.au
+ - **schooltv**
 - **SciVee**
 - **screen.yahoo:search**: Yahoo screen search
 - **Screencast**
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -221,6 +221,16 @@ class TestFormatSelection(unittest.TestCase):
        downloaded = ydl.downloaded_info_dicts[0]
        self.assertEqual(downloaded['format_id'], 'dash-video-low')

+        formats = [
+            {'format_id': 'vid-vcodec-dot', 'ext': 'mp4', 'preference': 1, 'vcodec': 'avc1.123456', 'acodec': 'none', 'url': TEST_URL},
+        ]
+        info_dict = _make_result(formats)
+
+        ydl = YDL({'format': 'bestvideo[vcodec=avc1.123456]'})
+        ydl.process_ie_result(info_dict.copy())
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded['format_id'], 'vid-vcodec-dot')
+
    def test_youtube_format_selection(self):
        order = [
            '38', '37', '46', '22', '45', '35', '44', '18', '34', '43', '6', '5', '36', '17', '13',
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@@ -56,7 +56,7 @@ class TestAllURLsMatching(unittest.TestCase):
        assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')

    def test_youtube_user_matching(self):
-        self.assertMatch('www.youtube.com/NASAgovVideo/videos', ['youtube:user'])
+        self.assertMatch('http://www.youtube.com/NASAgovVideo/videos', ['youtube:user'])

    def test_youtube_feeds(self):
        self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watchlater'])
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -263,7 +263,7 @@ class YoutubeDL(object):
    the downloader (see youtube_dl/downloader/common.py):
    nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
    noresizebuffer, retries, continuedl, noprogress, consoletitle,
-    xattr_set_filesize, external_downloader_args.
+    xattr_set_filesize, external_downloader_args, hls_use_mpegts.

    The following options are used by the post processors:
    prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
@@ -906,7 +906,7 @@ class YoutubeDL(object):
            str_operator_rex = re.compile(r'''(?x)
                \s*(?P<key>ext|acodec|vcodec|container|protocol)
                \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
-                \s*(?P<value>[a-zA-Z0-9_-]+)
+                \s*(?P<value>[a-zA-Z0-9._-]+)
                \s*$
                ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
            m = str_operator_rex.search(filter_spec)
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@@ -369,6 +369,7 @@ def _real_main(argv=None):
        'no_color': opts.no_color,
        'ffmpeg_location': opts.ffmpeg_location,
        'hls_prefer_native': opts.hls_prefer_native,
+        'hls_use_mpegts': opts.hls_use_mpegts,
        'external_downloader_args': external_downloader_args,
        'postprocessor_args': postprocessor_args,
        'cn_verification_proxy': opts.cn_verification_proxy,
--- a/youtube_dl/downloader/common.py
+++ b/youtube_dl/downloader/common.py
@@ -45,6 +45,7 @@ class FileDownloader(object):
                        (experimental)
    external_downloader_args:  A list of additional command-line arguments for the
                        external downloader.
+    hls_use_mpegts:     Use the mpegts container for HLS videos.

    Subclasses of this one must re-define the real_download method.
    """
--- a/youtube_dl/downloader/f4m.py
+++ b/youtube_dl/downloader/f4m.py
@@ -273,15 +273,21 @@ class F4mFD(FragmentFD):
        return fragments_list

    def _parse_bootstrap_node(self, node, base_url):
-        if node.text is None:
+        # Sometimes non empty inline bootstrap info can be specified along
+        # with bootstrap url attribute (e.g. dummy inline bootstrap info
+        # contains whitespace characters in [1]). We will prefer bootstrap
+        # url over inline bootstrap info when present.
+        # 1. http://live-1-1.rutube.ru/stream/1024/HDS/SD/C2NKsS85HQNckgn5HdEmOQ/1454167650/S-s604419906/move/four/dirs/upper/1024-576p.f4m
+        bootstrap_url = node.get('url')
+        if bootstrap_url:
            bootstrap_url = compat_urlparse.urljoin(
-                base_url, node.attrib['url'])
+                base_url, bootstrap_url)
            boot_info = self._get_bootstrap_from_url(bootstrap_url)
        else:
            bootstrap_url = None
            bootstrap = base64.b64decode(node.text.encode('ascii'))
            boot_info = read_bootstrap_info(bootstrap)
-        return (boot_info, bootstrap_url)
+        return boot_info, bootstrap_url

    def real_download(self, filename, info_dict):
        man_url = info_dict['url']
@@ -316,7 +322,8 @@ class F4mFD(FragmentFD):
            metadata = None

        fragments_list = build_fragments_list(boot_info)
-        if self.params.get('test', False):
+        test = self.params.get('test', False)
+        if test:
            # We only download the first fragment
            fragments_list = fragments_list[:1]
        total_frags = len(fragments_list)
@@ -326,6 +333,7 @@ class F4mFD(FragmentFD):
        ctx = {
            'filename': filename,
            'total_frags': total_frags,
+            'live': live,
        }

        self._prepare_frag_download(ctx)
@@ -380,7 +388,7 @@ class F4mFD(FragmentFD):
                else:
                    raise

-            if not fragments_list and live and bootstrap_url:
+            if not fragments_list and not test and live and bootstrap_url:
                fragments_list = self._update_live_fragments(bootstrap_url, frag_i)
                total_frags += len(fragments_list)
                if fragments_list and (fragments_list[0][1] > frag_i + 1):
--- a/youtube_dl/downloader/fragment.py
+++ b/youtube_dl/downloader/fragment.py
@@ -26,7 +26,11 @@ class FragmentFD(FileDownloader):
        self._start_frag_download(ctx)

    def _prepare_frag_download(self, ctx):
-        self.to_screen('[%s] Total fragments: %d' % (self.FD_NAME, ctx['total_frags']))
+        if 'live' not in ctx:
+            ctx['live'] = False
+        self.to_screen(
+            '[%s] Total fragments: %s'
+            % (self.FD_NAME, ctx['total_frags'] if not ctx['live'] else 'unknown (live)'))
        self.report_destination(ctx['filename'])
        dl = HttpQuietDownloader(
            self.ydl,
@@ -74,14 +78,14 @@ class FragmentFD(FileDownloader):
            if s['status'] not in ('downloading', 'finished'):
                return

-            frag_total_bytes = s.get('total_bytes') or 0
-
-            estimated_size = (
-                (ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) /
-                (state['frag_index'] + 1) * total_frags)
            time_now = time.time()
-            state['total_bytes_estimate'] = estimated_size
            state['elapsed'] = time_now - start
+            frag_total_bytes = s.get('total_bytes') or 0
+            if not ctx['live']:
+                estimated_size = (
+                    (ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) /
+                    (state['frag_index'] + 1) * total_frags)
+                state['total_bytes_estimate'] = estimated_size

            if s['status'] == 'finished':
                state['frag_index'] += 1
@@ -91,9 +95,10 @@ class FragmentFD(FileDownloader):
            else:
                frag_downloaded_bytes = s['downloaded_bytes']
                state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes']
-                state['eta'] = self.calc_eta(
-                    start, time_now, estimated_size,
-                    state['downloaded_bytes'])
+                if not ctx['live']:
+                    state['eta'] = self.calc_eta(
+                        start, time_now, estimated_size,
+                        state['downloaded_bytes'])
                state['speed'] = s.get('speed')
                ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
            self._hook_progress(state)
--- a/youtube_dl/downloader/hls.py
+++ b/youtube_dl/downloader/hls.py
@@ -39,7 +39,11 @@ class HlsFD(FileDownloader):
                '-headers',
                ''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())]

-        args += ['-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc']
+        args += ['-i', url, '-c', 'copy']
+        if self.params.get('hls_use_mpegts', False):
+            args += ['-f', 'mpegts']
+        else:
+            args += ['-f', 'mp4', '-bsf:a', 'aac_adtstoasc']

        args = [encodeArgument(opt) for opt in args]
        args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True))
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@@ -50,7 +50,7 @@ from .atresplayer import AtresPlayerIE
 from .atttechchannel import ATTTechChannelIE
 from .audimedia import AudiMediaIE
 from .audiomack import AudiomackIE, AudiomackAlbumIE
-from .azubu import AzubuIE
+from .azubu import AzubuIE, AzubuLiveIE
 from .baidu import BaiduVideoIE
 from .bambuser import BambuserIE, BambuserChannelIE
 from .bandcamp import BandcampIE, BandcampAlbumIE
@@ -372,6 +372,7 @@ from .macgamestore import MacGameStoreIE
 from .mailru import MailRuIE
 from .makertv import MakerTVIE
 from .malemotion import MalemotionIE
+from .matchtv import MatchTVIE
 from .mdr import MDRIE
 from .metacafe import MetacafeIE
 from .metacritic import MetacriticIE
@@ -482,6 +483,7 @@ from .npo import (
    NPOLiveIE,
    NPORadioIE,
    NPORadioFragmentIE,
+    SchoolTVIE,
    VPROIE,
    WNLIE
 )
--- a/youtube_dl/extractor/azubu.py
+++ b/youtube_dl/extractor/azubu.py
@@ -3,7 +3,11 @@ from __future__ import unicode_literals
 import json

 from .common import InfoExtractor
-from ..utils import float_or_none
+from ..utils import (
+    ExtractorError,
+    float_or_none,
+    sanitized_Request,
+)


 class AzubuIE(InfoExtractor):
@@ -91,3 +95,37 @@ class AzubuIE(InfoExtractor):
            'view_count': view_count,
            'formats': formats,
        }
+
+
+class AzubuLiveIE(InfoExtractor):
+    _VALID_URL = r'http://www.azubu.tv/(?P<id>[^/]+)$'
+
+    _TEST = {
+        'url': 'http://www.azubu.tv/MarsTVMDLen',
+        'only_matching': True,
+    }
+
+    def _real_extract(self, url):
+        user = self._match_id(url)
+
+        info = self._download_json(
+            'http://api.azubu.tv/public/modules/last-video/{0}/info'.format(user),
+            user)['data']
+        if info['type'] != 'STREAM':
+            raise ExtractorError('{0} is not streaming live'.format(user), expected=True)
+
+        req = sanitized_Request(
+            'https://edge-elb.api.brightcove.com/playback/v1/accounts/3361910549001/videos/ref:' + info['reference_id'])
+        req.add_header('Accept', 'application/json;pk=BCpkADawqM1gvI0oGWg8dxQHlgT8HkdE2LnAlWAZkOlznO39bSZX726u4JqnDsK3MDXcO01JxXK2tZtJbgQChxgaFzEVdHRjaDoxaOu8hHOO8NYhwdxw9BzvgkvLUlpbDNUuDoc4E4wxDToV')
+        bc_info = self._download_json(req, user)
+        m3u8_url = next(source['src'] for source in bc_info['sources'] if source['container'] == 'M2TS')
+        formats = self._extract_m3u8_formats(m3u8_url, user, ext='mp4')
+
+        return {
+            'id': info['id'],
+            'title': self._live_title(info['title']),
+            'uploader_id': user,
+            'formats': formats,
+            'is_live': True,
+            'thumbnail': bc_info['poster'],
+        }
--- a/youtube_dl/extractor/bbc.py
+++ b/youtube_dl/extractor/bbc.py
@@ -193,6 +193,19 @@ class BBCCoUkIE(InfoExtractor):
                # rtmp download
                'skip_download': True,
            },
+        }, {
+            # compact player (https://github.com/rg3/youtube-dl/issues/8147)
+            'url': 'http://www.bbc.co.uk/programmes/p028bfkf/player',
+            'info_dict': {
+                'id': 'p028bfkj',
+                'ext': 'flv',
+                'title': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews',
+                'description': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews',
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            },
        }, {
            'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
            'only_matching': True,
@@ -482,9 +495,11 @@ class BBCCoUkIE(InfoExtractor):
        if programme_id:
            formats, subtitles = self._download_media_selector(programme_id)
            title = self._og_search_title(webpage, default=None) or self._html_search_regex(
-                r'<h2[^>]+id="parent-title"[^>]*>(.+?)</h2>', webpage, 'title')
+                (r'<h2[^>]+id="parent-title"[^>]*>(.+?)</h2>',
+                 r'<div[^>]+class="info"[^>]*>\s*<h1>(.+?)</h1>'), webpage, 'title')
            description = self._search_regex(
-                r'<p class="[^"]*medium-description[^"]*">([^<]+)</p>',
+                (r'<p class="[^"]*medium-description[^"]*">([^<]+)</p>',
+                 r'<div[^>]+class="info_+synopsis"[^>]*>([^<]+)</div>'),
                webpage, 'description', default=None)
            if not description:
                description = self._html_search_meta('description', webpage)
--- a/youtube_dl/extractor/cbsnews.py
+++ b/youtube_dl/extractor/cbsnews.py
@@ -4,11 +4,10 @@ from __future__ import unicode_literals
 import re
 import json

-from .common import InfoExtractor
-from ..utils import remove_start
+from .theplatform import ThePlatformIE


-class CBSNewsIE(InfoExtractor):
+class CBSNewsIE(ThePlatformIE):
    IE_DESC = 'CBS News'
    _VALID_URL = r'http://(?:www\.)?cbsnews\.com/(?:[^/]+/)+(?P<id>[\da-z_-]+)'

@@ -31,7 +30,7 @@ class CBSNewsIE(InfoExtractor):
            'url': 'http://www.cbsnews.com/videos/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/',
            'info_dict': {
                'id': 'fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack',
-                'ext': 'flv',
+                'ext': 'mp4',
                'title': 'Fort Hood shooting: Army downplays mental illness as cause of attack',
                'thumbnail': 're:^https?://.*\.jpg$',
                'duration': 205,
@@ -42,7 +41,7 @@ class CBSNewsIE(InfoExtractor):
                },
            },
            'params': {
-                # rtmp download
+                # m3u8 download
                'skip_download': True,
            },
        },
@@ -63,33 +62,6 @@ class CBSNewsIE(InfoExtractor):
        duration = item.get('duration')
        thumbnail = item.get('mediaImage') or item.get('thumbnail')

-        formats = []
-        for format_id in ['RtmpMobileLow', 'RtmpMobileHigh', 'Hls', 'RtmpDesktop']:
-            uri = item.get('media' + format_id + 'URI')
-            if not uri:
-                continue
-            uri = remove_start(uri, '{manifest:none}')
-            fmt = {
-                'url': uri,
-                'format_id': format_id,
-            }
-            if uri.startswith('rtmp'):
-                play_path = re.sub(
-                    r'{slistFilePath}', '',
-                    uri.split('<break>')[-1].split('{break}')[-1])
-                play_path = re.sub(
-                    r'{manifest:.+}.*$', '', play_path)
-                fmt.update({
-                    'app': 'ondemand?auth=cbs',
-                    'play_path': 'mp4:' + play_path,
-                    'player_url': 'http://www.cbsnews.com/[[IMPORT]]/vidtech.cbsinteractive.com/player/3_3_0/CBSI_PLAYER_HD.swf',
-                    'page_url': 'http://www.cbsnews.com',
-                    'ext': 'flv',
-                })
-            elif uri.endswith('.m3u8'):
-                fmt['ext'] = 'mp4'
-            formats.append(fmt)
-
        subtitles = {}
        if 'mpxRefId' in video_info:
            subtitles['en'] = [{
@@ -97,6 +69,17 @@ class CBSNewsIE(InfoExtractor):
                'url': 'http://www.cbsnews.com/videos/captions/%s.adb_xml' % video_info['mpxRefId'],
            }]

+        formats = []
+        for format_id in ['RtmpMobileLow', 'RtmpMobileHigh', 'Hls', 'RtmpDesktop']:
+            pid = item.get('media' + format_id)
+            if not pid:
+                continue
+            release_url = 'http://link.theplatform.com/s/dJ5BDC/%s?format=SMIL&mbr=true' % pid
+            tp_formats, tp_subtitles = self._extract_theplatform_smil(release_url, video_id, 'Downloading %s SMIL data' % pid)
+            formats.extend(tp_formats)
+            subtitles = self._merge_subtitles(subtitles, tp_subtitles)
+        self._sort_formats(formats)
+
        return {
            'id': video_id,
            'title': title,
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -825,6 +825,12 @@ class InfoExtractor(object):
        if not formats:
            raise ExtractorError('No video formats found')

+        for f in formats:
+            # Automatically determine tbr when missing based on abr and vbr (improves
+            # formats sorting in some cases)
+            if 'tbr' not in f and f.get('abr') is not None and f.get('vbr') is not None:
+                f['tbr'] = f['abr'] + f['vbr']
+
        def _formats_key(f):
            # TODO remove the following workaround
            from ..utils import determine_ext
@@ -1014,6 +1020,18 @@ class InfoExtractor(object):
            return []
        m3u8_doc, urlh = res
        m3u8_url = urlh.geturl()
+        # A Media Playlist Tag MUST NOT appear in a Master Playlist
+        # https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3
+        # The EXT-X-TARGETDURATION tag is REQUIRED for every M3U8 Media Playlists
+        # https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.1
+        if '#EXT-X-TARGETDURATION' in m3u8_doc:
+            return [{
+                'url': m3u8_url,
+                'format_id': m3u8_id,
+                'ext': ext,
+                'protocol': entry_protocol,
+                'preference': preference,
+            }]
        last_info = None
        last_media = None
        kv_rex = re.compile(
@@ -1164,6 +1182,7 @@ class InfoExtractor(object):
        formats = []
        rtmp_count = 0
        http_count = 0
+        m3u8_count = 0

        videos = smil.findall(self._xpath_ns('.//video', namespace))
        for video in videos:
@@ -1203,8 +1222,17 @@ class InfoExtractor(object):
            src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src)

            if proto == 'm3u8' or src_ext == 'm3u8':
-                formats.extend(self._extract_m3u8_formats(
-                    src_url, video_id, ext or 'mp4', m3u8_id='hls', fatal=False))
+                m3u8_formats = self._extract_m3u8_formats(
+                    src_url, video_id, ext or 'mp4', m3u8_id='hls', fatal=False)
+                if len(m3u8_formats) == 1:
+                    m3u8_count += 1
+                    m3u8_formats[0].update({
+                        'format_id': 'hls-%d' % (m3u8_count if bitrate is None else bitrate),
+                        'tbr': bitrate,
+                        'width': width,
+                        'height': height,
+                    })
+                formats.extend(m3u8_formats)
                continue

            if src_ext == 'f4m':
--- a/youtube_dl/extractor/cspan.py
+++ b/youtube_dl/extractor/cspan.py
@@ -113,7 +113,7 @@ class CSpanIE(InfoExtractor):
                    'tbr': int_or_none(get_text_attr(quality, 'bitrate')),
                })
            if not formats:
-                path = get_text_attr(f, 'path')
+                path = unescapeHTML(get_text_attr(f, 'path'))
                if not path:
                    continue
                formats = self._extract_m3u8_formats(
--- a/youtube_dl/extractor/daum.py
+++ b/youtube_dl/extractor/daum.py
@@ -2,8 +2,13 @@

 from __future__ import unicode_literals

+import re
+
 from .common import InfoExtractor
-from ..compat import compat_urllib_parse
+from ..compat import (
+    compat_urllib_parse,
+    compat_urllib_parse_unquote,
+)
 from ..utils import (
    int_or_none,
    str_to_int,
@@ -12,7 +17,7 @@ from ..utils import (


 class DaumIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/v/(?P<id>[^?#&]+)'
+    _VALID_URL = r'https?://(?:(?:m\.)?tvpot\.daum\.net/v/|videofarm\.daum\.net/controller/player/VodPlayer\.swf\?vid=)(?P<id>[^?#&]+)'
    IE_NAME = 'daum.net'

    _TESTS = [{
@@ -23,25 +28,57 @@ class DaumIE(InfoExtractor):
            'title': '마크 헌트 vs 안토니오 실바',
            'description': 'Mark Hunt vs Antonio Silva',
            'upload_date': '20131217',
+            'thumbnail': 're:^https?://.*\.(?:jpg|png)',
            'duration': 2117,
            'view_count': int,
            'comment_count': int,
        },
+    }, {
+        'url': 'http://m.tvpot.daum.net/v/65139429',
+        'info_dict': {
+            'id': '65139429',
+            'ext': 'mp4',
+            'title': 'md5:a100d65d09cec246d8aa9bde7de45aed',
+            'description': 'md5:79794514261164ff27e36a21ad229fc5',
+            'upload_date': '20150604',
+            'thumbnail': 're:^https?://.*\.(?:jpg|png)',
+            'duration': 154,
+            'view_count': int,
+            'comment_count': int,
+        },
    }, {
        'url': 'http://tvpot.daum.net/v/07dXWRka62Y%24',
        'only_matching': True,
+    }, {
+        'url': 'http://videofarm.daum.net/controller/player/VodPlayer.swf?vid=vwIpVpCQsT8%24&ref=',
+        'info_dict': {
+            'id': 'vwIpVpCQsT8$',
+            'ext': 'flv',
+            'title': '01-Korean War ( Trouble on the horizon )',
+            'description': '\nKorean War 01\nTrouble on the horizon\n전쟁의 먹구름',
+            'upload_date': '20080223',
+            'thumbnail': 're:^https?://.*\.(?:jpg|png)',
+            'duration': 249,
+            'view_count': int,
+            'comment_count': int,
+        },
    }]

    def _real_extract(self, url):
-        video_id = self._match_id(url)
+        video_id = compat_urllib_parse_unquote(self._match_id(url))
        query = compat_urllib_parse.urlencode({'vid': video_id})
-        info = self._download_xml(
-            'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
-            'Downloading video info')
        movie_data = self._download_json(
            'http://videofarm.daum.net/controller/api/closed/v1_2/IntegratedMovieData.json?' + query,
            video_id, 'Downloading video formats info')

+        # For urls like http://m.tvpot.daum.net/v/65139429, where the video_id is really a clipid
+        if not movie_data.get('output_list', {}).get('output_list') and re.match(r'^\d+$', video_id):
+            return self.url_result('http://tvpot.daum.net/clip/ClipView.do?clipid=%s' % video_id)
+
+        info = self._download_xml(
+            'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
+            'Downloading video info')
+
        formats = []
        for format_el in movie_data['output_list']['output_list']:
            profile = format_el['profile']
@@ -76,7 +113,7 @@ class DaumIE(InfoExtractor):


 class DaumClipIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/(?:clip/ClipView.do|mypot/View.do)\?.*?clipid=(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/(?:clip/ClipView.(?:do|tv)|mypot/View.do)\?.*?clipid=(?P<id>\d+)'
    IE_NAME = 'daum.net:clip'

    _TESTS = [{
@@ -87,9 +124,13 @@ class DaumClipIE(InfoExtractor):
            'title': 'DOTA 2GETHER 시즌2 6회 - 2부',
            'description': 'DOTA 2GETHER 시즌2 6회 - 2부',
            'upload_date': '20130831',
+            'thumbnail': 're:^https?://.*\.(?:jpg|png)',
            'duration': 3868,
            'view_count': int,
        },
+    }, {
+        'url': 'http://m.tvpot.daum.net/clip/ClipView.tv?clipid=54999425',
+        'only_matching': True,
    }]

    def _real_extract(self, url):
--- a/youtube_dl/extractor/espn.py
+++ b/youtube_dl/extractor/espn.py
@@ -53,8 +53,8 @@ class ESPNIE(InfoExtractor):
        webpage = self._download_webpage(url, video_id)

        video_id = self._search_regex(
-            r'class="video-play-button"[^>]+data-id="(\d+)',
-            webpage, 'video id')
+            r'class=(["\']).*?video-play-button.*?\1[^>]+data-id=["\'](?P<id>\d+)',
+            webpage, 'video id', group='id')

        cms = 'espn'
        if 'data-source="intl"' in webpage:
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@@ -150,10 +150,32 @@ class FacebookIE(InfoExtractor):
        url = 'https://www.facebook.com/video/video.php?v=%s' % video_id
        webpage = self._download_webpage(url, video_id)

+        video_data = None
+
        BEFORE = '{swf.addParam(param[0], param[1]);});\n'
        AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});'
        m = re.search(re.escape(BEFORE) + '(.*?)' + re.escape(AFTER), webpage)
-        if not m:
+        if m:
+            data = dict(json.loads(m.group(1)))
+            params_raw = compat_urllib_parse_unquote(data['params'])
+            video_data = json.loads(params_raw)['video_data']
+
+        def video_data_list2dict(video_data):
+            ret = {}
+            for item in video_data:
+                format_id = item['stream_type']
+                ret.setdefault(format_id, []).append(item)
+            return ret
+
+        if not video_data:
+            server_js_data = self._parse_json(self._search_regex(
+                r'handleServerJS\(({.+})\);', webpage, 'server js data'), video_id)
+            for item in server_js_data['instances']:
+                if item[1][0] == 'VideoConfig':
+                    video_data = video_data_list2dict(item[2][0]['videoData'])
+                    break
+
+        if not video_data:
            m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage)
            if m_msg is not None:
                raise ExtractorError(
@@ -161,12 +183,9 @@ class FacebookIE(InfoExtractor):
                    expected=True)
            else:
                raise ExtractorError('Cannot parse data')
-        data = dict(json.loads(m.group(1)))
-        params_raw = compat_urllib_parse_unquote(data['params'])
-        params = json.loads(params_raw)

        formats = []
-        for format_id, f in params['video_data'].items():
+        for format_id, f in video_data.items():
            if not f or not isinstance(f, list):
                continue
            for quality in ('sd', 'hd'):
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -1819,6 +1819,17 @@ class GenericIE(InfoExtractor):
        if digiteka_url:
            return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key())

+        # Look for Limelight embeds
+        mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
+        if mobj:
+            lm = {
+                'Media': 'media',
+                'Channel': 'channel',
+                'ChannelList': 'channel_list',
+            }
+            return self.url_result('limelight:%s:%s' % (
+                lm[mobj.group(1)], mobj.group(2)), 'Limelight%s' % mobj.group(1), mobj.group(2))
+
        # Look for AdobeTVVideo embeds
        mobj = re.search(
            r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
--- a/youtube_dl/extractor/letv.py
+++ b/youtube_dl/extractor/letv.py
@@ -5,11 +5,13 @@ import datetime
 import re
 import time
 import base64
+import hashlib

 from .common import InfoExtractor
 from ..compat import (
    compat_urllib_parse,
    compat_ord,
+    compat_str,
 )
 from ..utils import (
    determine_ext,
@@ -258,6 +260,7 @@ class LetvCloudIE(InfoExtractor):
        },
    }, {
        'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=ec93197892&pu=2c7cd40209&auto_play=1&gpcflag=1&width=640&height=360',
+        'md5': 'e03d9cc8d9c13191e1caf277e42dbd31',
        'info_dict': {
            'id': 'p7jnfw5hw9_ec93197892',
            'ext': 'mp4',
@@ -265,6 +268,7 @@ class LetvCloudIE(InfoExtractor):
        },
    }, {
        'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=187060b6fd',
+        'md5': 'cb988699a776b22d4a41b9d43acfb3ac',
        'info_dict': {
            'id': 'p7jnfw5hw9_187060b6fd',
            'ext': 'mp4',
@@ -272,21 +276,37 @@ class LetvCloudIE(InfoExtractor):
        },
    }]

-    def _real_extract(self, url):
-        uu_mobj = re.search('uu=([\w]+)', url)
-        vu_mobj = re.search('vu=([\w]+)', url)
+    @staticmethod
+    def sign_data(obj):
+        if obj['cf'] == 'flash':
+            salt = '2f9d6924b33a165a6d8b5d3d42f4f987'
+            items = ['cf', 'format', 'ran', 'uu', 'ver', 'vu']
+        elif obj['cf'] == 'html5':
+            salt = 'fbeh5player12c43eccf2bec3300344'
+            items = ['cf', 'ran', 'uu', 'bver', 'vu']
+        input_data = ''.join([item + obj[item] for item in items]) + salt
+        obj['sign'] = hashlib.md5(input_data.encode('utf-8')).hexdigest()

-        if not uu_mobj or not vu_mobj:
-            raise ExtractorError('Invalid URL: %s' % url, expected=True)
+    def _get_formats(self, cf, uu, vu, media_id):
+        def get_play_json(cf, timestamp):
+            data = {
+                'cf': cf,
+                'ver': '2.2',
+                'bver': 'firefox44.0',
+                'format': 'json',
+                'uu': uu,
+                'vu': vu,
+                'ran': compat_str(timestamp),
+            }
+            self.sign_data(data)
+            return self._download_json(
+                'http://api.letvcloud.com/gpc.php?' + compat_urllib_parse.urlencode(data),
+                media_id, 'Downloading playJson data for type %s' % cf)

-        uu = uu_mobj.group(1)
-        vu = vu_mobj.group(1)
-        media_id = uu + '_' + vu
-
-        play_json_req = sanitized_Request(
-            'http://api.letvcloud.com/gpc.php?cf=html5&sign=signxxxxx&ver=2.2&format=json&' +
-            'uu=' + uu + '&vu=' + vu)
-        play_json = self._download_json(play_json_req, media_id, 'Downloading playJson data')
+        play_json = get_play_json(cf, time.time())
+        # The server time may be different from local time
+        if play_json.get('code') == 10071:
+            play_json = get_play_json(cf, play_json['timestamp'])

        if not play_json.get('data'):
            if play_json.get('message'):
@@ -312,6 +332,21 @@ class LetvCloudIE(InfoExtractor):
                'width': int_or_none(play_url.get('vwidth')),
                'height': int_or_none(play_url.get('vheight')),
            })
+
+        return formats
+
+    def _real_extract(self, url):
+        uu_mobj = re.search('uu=([\w]+)', url)
+        vu_mobj = re.search('vu=([\w]+)', url)
+
+        if not uu_mobj or not vu_mobj:
+            raise ExtractorError('Invalid URL: %s' % url, expected=True)
+
+        uu = uu_mobj.group(1)
+        vu = vu_mobj.group(1)
+        media_id = uu + '_' + vu
+
+        formats = self._get_formats('flash', uu, vu, media_id) + self._get_formats('html5', uu, vu, media_id)
        self._sort_formats(formats)

        return {
--- a/youtube_dl/extractor/limelight.py
+++ b/youtube_dl/extractor/limelight.py
@@ -40,7 +40,8 @@ class LimelightBaseIE(InfoExtractor):
            if not stream_url:
                continue
            if '.f4m' in stream_url:
-                formats.extend(self._extract_f4m_formats(stream_url, video_id))
+                formats.extend(self._extract_f4m_formats(
+                    stream_url, video_id, fatal=False))
            else:
                fmt = {
                    'url': stream_url,
@@ -72,8 +73,8 @@ class LimelightBaseIE(InfoExtractor):
            format_id = mobile_url.get('targetMediaPlatform')
            if determine_ext(media_url) == 'm3u8':
                formats.extend(self._extract_m3u8_formats(
-                    media_url, video_id, 'mp4', entry_protocol='m3u8_native',
-                    preference=-1, m3u8_id=format_id))
+                    media_url, video_id, 'mp4', 'm3u8_native',
+                    m3u8_id=format_id, fatal=False))
            else:
                formats.append({
                    'url': media_url,
--- a/youtube_dl/extractor/matchtv.py
+++ b/youtube_dl/extractor/matchtv.py
@@ -0,0 +1,55 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import random
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse
+from ..utils import (
+    sanitized_Request,
+    xpath_text,
+)
+
+
+class MatchTVIE(InfoExtractor):
+    _VALID_URL = r'https?://matchtv\.ru/?#live-player'
+    _TEST = {
+        'url': 'http://matchtv.ru/#live-player',
+        'info_dict': {
+            'id': 'matchtv-live',
+            'ext': 'flv',
+            'title': 're:^Матч ТВ - Прямой эфир \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
+            'is_live': True,
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = 'matchtv-live'
+        request = sanitized_Request(
+            'http://player.matchtv.ntvplus.tv/player/smil?%s' % compat_urllib_parse.urlencode({
+                'ts': '',
+                'quality': 'SD',
+                'contentId': '561d2c0df7159b37178b4567',
+                'sign': '',
+                'includeHighlights': '0',
+                'userId': '',
+                'sessionId': random.randint(1, 1000000000),
+                'contentType': 'channel',
+                'timeShift': '0',
+                'platform': 'portal',
+            }),
+            headers={
+                'Referer': 'http://player.matchtv.ntvplus.tv/embed-player/NTVEmbedPlayer.swf',
+            })
+        video_url = self._download_json(request, video_id)['data']['videoUrl']
+        f4m_url = xpath_text(self._download_xml(video_url, video_id), './to')
+        formats = self._extract_f4m_formats(f4m_url, video_id)
+        return {
+            'id': video_id,
+            'title': self._live_title('Матч ТВ - Прямой эфир'),
+            'is_live': True,
+            'formats': formats,
+        }
--- a/youtube_dl/extractor/nba.py
+++ b/youtube_dl/extractor/nba.py
@@ -18,13 +18,17 @@ class NBAIE(InfoExtractor):
        'md5': '9e7729d3010a9c71506fd1248f74e4f4',
        'info_dict': {
            'id': '0021200253-okc-bkn-recap',
-            'ext': 'flv',
+            'ext': 'mp4',
            'title': 'Thunder vs. Nets',
            'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
            'duration': 181,
            'timestamp': 1354638466,
            'upload_date': '20121204',
        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
    }, {
        'url': 'http://www.nba.com/video/games/hornets/2014/12/05/0021400276-nyk-cha-play5.nba/',
        'only_matching': True,
@@ -68,7 +72,7 @@ class NBAIE(InfoExtractor):
            if video_url.startswith('/'):
                continue
            if video_url.endswith('.m3u8'):
-                formats.extend(self._extract_m3u8_formats(video_url, video_id, m3u8_id='hls', fatal=False))
+                formats.extend(self._extract_m3u8_formats(video_url, video_id, ext='mp4', m3u8_id='hls', fatal=False))
            elif video_url.endswith('.f4m'):
                formats.extend(self._extract_f4m_formats(video_url + '?hdcore=3.4.1.1', video_id, f4m_id='hds', fatal=False))
            else:
--- a/youtube_dl/extractor/nbc.py
+++ b/youtube_dl/extractor/nbc.py
@@ -19,32 +19,39 @@ class NBCIE(InfoExtractor):
    _TESTS = [
        {
            'url': 'http://www.nbc.com/the-tonight-show/segments/112966',
-            # md5 checksum is not stable
            'info_dict': {
-                'id': 'c9xnCo0YPOPH',
-                'ext': 'flv',
+                'id': '112966',
+                'ext': 'mp4',
                'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s',
                'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.',
            },
+            'params': {
+                # m3u8 download
+                'skip_download': True,
+            },
        },
        {
            'url': 'http://www.nbc.com/the-tonight-show/episodes/176',
            'info_dict': {
-                'id': 'XwU9KZkp98TH',
+                'id': '176',
                'ext': 'flv',
                'title': 'Ricky Gervais, Steven Van Zandt, ILoveMakonnen',
                'description': 'A brand new episode of The Tonight Show welcomes Ricky Gervais, Steven Van Zandt and ILoveMakonnen.',
            },
-            'skip': 'Only works from US',
+            'skip': '404 Not Found',
        },
        {
            'url': 'http://www.nbc.com/saturday-night-live/video/star-wars-teaser/2832821',
            'info_dict': {
-                'id': '8iUuyzWDdYUZ',
-                'ext': 'flv',
+                'id': '2832821',
+                'ext': 'mp4',
                'title': 'Star Wars Teaser',
                'description': 'md5:0b40f9cbde5b671a7ff62fceccc4f442',
            },
+            'params': {
+                # m3u8 download
+                'skip_download': True,
+            },
            'skip': 'Only works from US',
        },
        {
@@ -66,7 +73,11 @@ class NBCIE(InfoExtractor):
            webpage, 'theplatform url').replace('_no_endcard', '').replace('\\/', '/')))
        if theplatform_url.startswith('//'):
            theplatform_url = 'http:' + theplatform_url
-        return self.url_result(smuggle_url(theplatform_url, {'source_url': url}))
+        return {
+            '_type': 'url_transparent',
+            'url': smuggle_url(theplatform_url, {'source_url': url}),
+            'id': video_id,
+        }


 class NBCSportsVPlayerIE(InfoExtractor):
--- a/youtube_dl/extractor/ndr.py
+++ b/youtube_dl/extractor/ndr.py
@@ -193,7 +193,7 @@ class NDREmbedBaseIE(InfoExtractor):
                    src + '?hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id, f4m_id='hds'))
            elif ext == 'm3u8':
                formats.extend(self._extract_m3u8_formats(
-                    src, video_id, m3u8_id='hls', entry_protocol='m3u8_native'))
+                    src, video_id, 'mp4', m3u8_id='hls', entry_protocol='m3u8_native'))
            else:
                quality = f.get('quality')
                ff = {
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -189,7 +189,7 @@ class NPOIE(NPOBaseIE):
                if not video_url:
                    continue
                if format_id == 'adaptive':
-                    formats.extend(self._extract_m3u8_formats(video_url, video_id))
+                    formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4'))
                else:
                    formats.append({
                        'url': video_url,
@@ -406,6 +406,38 @@ class NPORadioFragmentIE(InfoExtractor):
        }


+class SchoolTVIE(InfoExtractor):
+    IE_NAME = 'schooltv'
+    _VALID_URL = r'https?://(?:www\.)?schooltv\.nl/video/(?P<id>[^/?#&]+)'
+
+    _TEST = {
+        'url': 'http://www.schooltv.nl/video/ademhaling-de-hele-dag-haal-je-adem-maar-wat-gebeurt-er-dan-eigenlijk-in-je-lichaam/',
+        'info_dict': {
+            'id': 'WO_NTR_429477',
+            'display_id': 'ademhaling-de-hele-dag-haal-je-adem-maar-wat-gebeurt-er-dan-eigenlijk-in-je-lichaam',
+            'title': 'Ademhaling: De hele dag haal je adem. Maar wat gebeurt er dan eigenlijk in je lichaam?',
+            'ext': 'mp4',
+            'description': 'md5:abfa0ff690adb73fd0297fd033aaa631'
+        },
+        'params': {
+            # Skip because of m3u8 download
+            'skip_download': True
+        }
+    }
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+        video_id = self._search_regex(
+            r'data-mid=(["\'])(?P<id>.+?)\1', webpage, 'video_id', group='id')
+        return {
+            '_type': 'url_transparent',
+            'ie_key': 'NPO',
+            'url': 'npo:%s' % video_id,
+            'display_id': display_id
+        }
+
+
 class VPROIE(NPOIE):
    IE_NAME = 'vpro'
    _VALID_URL = r'https?://(?:www\.)?(?:tegenlicht\.)?vpro\.nl/(?:[^/]+/){2,}(?P<id>[^/]+)\.html'
--- a/youtube_dl/extractor/nrk.py
+++ b/youtube_dl/extractor/nrk.py
@@ -133,26 +133,32 @@ class NRKTVIE(InfoExtractor):
    _TESTS = [
        {
            'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
-            'md5': 'adf2c5454fa2bf032f47a9f8fb351342',
            'info_dict': {
                'id': 'MUHH48000314',
-                'ext': 'flv',
+                'ext': 'mp4',
                'title': '20 spørsmål',
                'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
                'upload_date': '20140523',
                'duration': 1741.52,
            },
+            'params': {
+                # m3u8 download
+                'skip_download': True,
+            },
        },
        {
            'url': 'https://tv.nrk.no/program/mdfp15000514',
-            'md5': '383650ece2b25ecec996ad7b5bb2a384',
            'info_dict': {
                'id': 'mdfp15000514',
-                'ext': 'flv',
-                'title': 'Kunnskapskanalen: Grunnlovsjubiléet - Stor ståhei for ingenting',
+                'ext': 'mp4',
+                'title': 'Grunnlovsjubiléet - Stor ståhei for ingenting',
                'description': 'md5:654c12511f035aed1e42bdf5db3b206a',
                'upload_date': '20140524',
-                'duration': 4605.0,
+                'duration': 4605.08,
+            },
+            'params': {
+                # m3u8 download
+                'skip_download': True,
            },
        },
        {
--- a/youtube_dl/extractor/odnoklassniki.py
+++ b/youtube_dl/extractor/odnoklassniki.py
@@ -13,7 +13,7 @@ from ..utils import (


 class OdnoklassnikiIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?(?:odnoklassniki|ok)\.ru/(?:video(?:embed)?|web-api/video/moviePlayer)/(?P<id>[\d-]+)'
+    _VALID_URL = r'https?://(?:(?:www|m|mobile)\.)?(?:odnoklassniki|ok)\.ru/(?:video(?:embed)?|web-api/video/moviePlayer)/(?P<id>[\d-]+)'
    _TESTS = [{
        # metadata in JSON
        'url': 'http://ok.ru/video/20079905452',
@@ -69,6 +69,12 @@ class OdnoklassnikiIE(InfoExtractor):
    }, {
        'url': 'http://www.ok.ru/videoembed/20648036891',
        'only_matching': True,
+    }, {
+        'url': 'http://m.ok.ru/video/20079905452',
+        'only_matching': True,
+    }, {
+        'url': 'http://mobile.ok.ru/video/20079905452',
+        'only_matching': True,
    }]

    def _real_extract(self, url):
--- a/youtube_dl/extractor/screenwavemedia.py
+++ b/youtube_dl/extractor/screenwavemedia.py
@@ -71,7 +71,7 @@ class ScreenwaveMediaIE(InfoExtractor):
        formats = []
        for source in sources:
            if source['type'] == 'hls':
-                formats.extend(self._extract_m3u8_formats(source['file'], video_id))
+                formats.extend(self._extract_m3u8_formats(source['file'], video_id, ext='mp4'))
            else:
                file_ = source.get('file')
                if not file_:
@@ -107,7 +107,11 @@ class TeamFourIE(InfoExtractor):
            'upload_date': '20130401',
            'description': 'Check out this and more on our website: http://teamfourstar.com\nTFS Store: http://sharkrobot.com/team-four-star\nFollow on Twitter: http://twitter.com/teamfourstar\nLike on FB: http://facebook.com/teamfourstar',
            'title': 'A Moment With TFS Episode 4',
-        }
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
    }

    def _real_extract(self, url):
--- a/youtube_dl/extractor/senateisvp.py
+++ b/youtube_dl/extractor/senateisvp.py
@@ -53,17 +53,25 @@ class SenateISVPIE(InfoExtractor):
        'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png',
        'info_dict': {
            'id': 'judiciary031715',
-            'ext': 'flv',
+            'ext': 'mp4',
            'title': 'Integrated Senate Video Player',
            'thumbnail': 're:^https?://.*\.(?:jpg|png)$',
-        }
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
    }, {
        'url': 'http://www.senate.gov/isvp/?type=live&comm=commerce&filename=commerce011514.mp4&auto_play=false',
        'info_dict': {
            'id': 'commerce011514',
-            'ext': 'flv',
+            'ext': 'mp4',
            'title': 'Integrated Senate Video Player'
-        }
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
    }, {
        'url': 'http://www.senate.gov/isvp/?type=arch&comm=intel&filename=intel090613&hc_location=ufi',
        # checksum differs each time
--- a/youtube_dl/extractor/spankbang.py
+++ b/youtube_dl/extractor/spankbang.py
@@ -34,11 +34,11 @@ class SpankBangIE(InfoExtractor):
            'ext': 'mp4',
            'format_id': '%sp' % height,
            'height': int(height),
-        } for height in re.findall(r'<span[^>]+q_(\d+)p', webpage)]
+        } for height in re.findall(r'<(?:span|li)[^>]+q_(\d+)p', webpage)]
        self._sort_formats(formats)

        title = self._html_search_regex(
-            r'(?s)<h1>(.+?)</h1>', webpage, 'title')
+            r'(?s)<h1[^>]*>(.+?)</h1>', webpage, 'title')
        description = self._search_regex(
            r'class="desc"[^>]*>([^<]+)',
            webpage, 'description', default=None)
--- a/youtube_dl/extractor/tv2.py
+++ b/youtube_dl/extractor/tv2.py
@@ -17,18 +17,21 @@ class TV2IE(InfoExtractor):
    _VALID_URL = 'http://(?:www\.)?tv2\.no/v/(?P<id>\d+)'
    _TEST = {
        'url': 'http://www.tv2.no/v/916509/',
-        'md5': '9cb9e3410b18b515d71892f27856e9b1',
        'info_dict': {
            'id': '916509',
-            'ext': 'flv',
-            'title': 'Se Gryttens hyllest av Steven Gerrard',
+            'ext': 'mp4',
+            'title': 'Se Frode Gryttens hyllest av Steven Gerrard',
            'description': 'TV 2 Sportens huspoet tar avskjed med Liverpools kaptein Steven Gerrard.',
            'timestamp': 1431715610,
            'upload_date': '20150515',
            'duration': 156.967,
            'view_count': int,
            'categories': list,
-        }
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
    }

    def _real_extract(self, url):
--- a/youtube_dl/extractor/vgtv.py
+++ b/youtube_dl/extractor/vgtv.py
@@ -86,10 +86,9 @@ class VGTVIE(XstreamIE):
        {
            # streamType: wasLive
            'url': 'http://www.vgtv.no/#!/live/113063/direkte-v75-fra-solvalla',
-            'md5': '458f4841239dab414343b50e5af8869c',
            'info_dict': {
                'id': '113063',
-                'ext': 'flv',
+                'ext': 'mp4',
                'title': 'V75 fra Solvalla 30.05.15',
                'description': 'md5:b3743425765355855f88e096acc93231',
                'thumbnail': 're:^https?://.*\.jpg',
@@ -98,6 +97,10 @@ class VGTVIE(XstreamIE):
                'upload_date': '20150530',
                'view_count': int,
            },
+            'params': {
+                # m3u8 download
+                'skip_download': True,
+            },
        },
        {
            'url': 'http://www.aftenposten.no/webtv/#!/video/21039/trailer-sweatshop-i-can-t-take-any-more',
--- a/youtube_dl/extractor/viidea.py
+++ b/youtube_dl/extractor/viidea.py
@@ -45,6 +45,10 @@ class ViideaIE(InfoExtractor):
            'upload_date': '20130627',
            'duration': 565,
        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
    }, {
        # video with invalid direct format links (HTTP 403)
        'url': 'http://videolectures.net/russir2010_filippova_nlp/',
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -181,7 +181,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
            return


-class YoutubeEntryListBaseInfoExtractor(InfoExtractor):
+class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
    # Extract entries from page with "Load more" button
    def _entries(self, page, playlist_id):
        more_widget_html = content_html = page
@@ -233,7 +233,7 @@ class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):

 class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
    def _process_page(self, content):
-        for playlist_id in re.findall(r'href="/?playlist\?list=(.+?)"', content):
+        for playlist_id in set(re.findall(r'href="/?playlist\?list=([0-9A-Za-z-_]{10,})"', content)):
            yield self.url_result(
                'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')

@@ -1602,7 +1602,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
        }


-class YoutubePlaylistIE(YoutubeBaseInfoExtractor, YoutubePlaylistBaseInfoExtractor):
+class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
    IE_DESC = 'YouTube.com playlists'
    _VALID_URL = r"""(?x)(?:
                        (?:https?://)?
@@ -1846,7 +1846,7 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):

 class YoutubeUserIE(YoutubeChannelIE):
    IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
-    _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
+    _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
    _TEMPLATE_URL = 'https://www.youtube.com/user/%s/videos'
    IE_NAME = 'youtube:user'

--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@@ -415,6 +415,11 @@ def parseOpts(overrideArguments=None):
        '--hls-prefer-native',
        dest='hls_prefer_native', action='store_true',
        help='Use the native HLS downloader instead of ffmpeg (experimental)')
+    downloader.add_option(
+        '--hls-use-mpegts',
+        dest='hls_use_mpegts', action='store_true',
+        help='Use the mpegts container for HLS videos, allowing to play the '
+             'video while downloading (some players may not be able to play it)')
    downloader.add_option(
        '--external-downloader',
        dest='external_downloader', metavar='COMMAND',
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals

-__version__ = '2016.01.27'
+__version__ = '2016.01.31'
Author	SHA1	Message	Date
Philipp Hagemeister	a69bee4762	release 2016.01.31	2016-01-31 12:57:18 +01:00
Sergey M․	9acd33094d	[youtube] Filter duplicates in playlists base extractor	2016-01-31 17:52:02 +06:00
Sergey M․	8e7aad2075	[youtube] Use authentication for entry list base extractor (Closes #8380 )	2016-01-31 17:49:59 +06:00
Yen Chi Hsuan	7b7507d6e1	[letv] Fix LetvCloud extraction	2016-01-31 07:15:43 +08:00
Sergey M․	673fb82e65	[schooltv] Improve video id regex	2016-01-31 04:41:18 +06:00
Sergey M	181cf24bc0	Merge pull request #8376 from rrooij/schooltv [schooltv] Add extractor for SchoolTV playlists	2016-01-31 04:36:33 +06:00
rrooij	89f2602880	[schooltv] Add extractor for SchoolTV playlists This closes #8163	2016-01-30 23:21:42 +01:00
Yen Chi Hsuan	db9b1dbcd9	[nba] Add ext for hls formats and fix test_NBA	2016-01-31 04:58:10 +08:00
Yen Chi Hsuan	e881c4bcab	[nbc] Use NBC's id and fix _TESTS ThePlatform URL gives the same ID for all _TESTS	2016-01-31 04:58:10 +08:00
Yen Chi Hsuan	670ad51ade	[nrktv] Fix _TESTS	2016-01-31 04:58:10 +08:00
Yen Chi Hsuan	eb6fc7d32a	[senateisvp] Fix test_SenateISVP and test_SenateISVP_1	2016-01-31 04:58:10 +08:00
Yen Chi Hsuan	ed1a390583	[tv2] Fix test_TV2	2016-01-31 04:58:10 +08:00
Yen Chi Hsuan	809e1857c5	[screenwavemedia] Fix HLS extension and test_TeamFour	2016-01-31 04:58:10 +08:00
Yen Chi Hsuan	7c38af48b9	[vgtv] Fix test_VGTV_2	2016-01-31 04:58:10 +08:00
Yen Chi Hsuan	60ad3eb970	[viidea] Skip download for the test case requiring ffmpeg	2016-01-31 04:58:10 +08:00
Sergey M․	a7685b3a6b	[npo] Add extension for m3u8	2016-01-31 02:38:28 +06:00
remitamine	8f1fddc816	[limelight] fix format sorting and make m3u8 and f4m extraction non fatal	2016-01-30 20:51:47 +01:00
remitamine	1bf996fa5c	[generic] Add support for Limelight API	2016-01-30 20:45:56 +01:00
Sergey M․	b8c9926c0a	[downloader/f4m] Do not update fragment list while test	2016-01-30 19:43:25 +06:00
Sergey M․	2c2f1efdcd	[downloader/fragment] Remove superfluous whitespace	2016-01-30 19:30:31 +06:00
Sergey M․	09104e9930	[downloader/f4m] Add live stream flag to context Now download progress for f4m livestreams is reported correctly	2016-01-30 19:22:15 +06:00
Sergey M․	5fa1702ca6	[downloader/fragment] Do not report total bytes estimation and eta for live streams	2016-01-30 19:20:52 +06:00
Sergey M․	53be8894e4	[options] Add missing closing parenthesis	2016-01-30 18:44:22 +06:00
Sergey M․	c3deacd562	[matchtv] Add extractor (Closes #8313 )	2016-01-30 18:30:27 +06:00
Sergey M․	8ab3fe81d8	[downloader/f4m] Prefer bootstrap url attribute over inline bootstrap info	2016-01-30 18:28:38 +06:00
Yen Chi Hsuan	c140629995	[facebook] Support alternative webpage form Fixes #8371	2016-01-30 19:33:22 +08:00
Jaime Marquínez Ferrándiz	7d106a65ca	Add --hls-use-mpegts option When using the mpegts container hls vidoes can be played while being downloaded (useful if you are recording a live stream). VLC and mpv play them file, but QuickTime doesn't.	2016-01-30 12:26:40 +01:00
Yen Chi Hsuan	0179f6a830	[daum] Add 'thumbnail' to all _TESTS	2016-01-30 16:54:14 +08:00
Yen Chi Hsuan	830afe85dc	[daum.net] Support VodPlayer.swf URLs (closes #8173 )	2016-01-30 16:50:13 +08:00
Yen Chi Hsuan	8bf39420b4	Merge remote-tracking branch 'upstream/master'	2016-01-30 16:25:55 +08:00
Yen Chi Hsuan	71d08b3e29	Merge branch 'ping-daum-fix-clip'	2016-01-30 16:25:06 +08:00
Yen Chi Hsuan	06ffa33485	[daum.net] Move the request to ClipInfoXml.do To reduce the number of wasted requests	2016-01-30 16:23:37 +08:00
Yen Chi Hsuan	874e05975b	Merge branch 'daum-fix-clip' of https://github.com/ping/youtube-dl into ping-daum-fix-clip	2016-01-30 16:22:37 +08:00
ping	f5d30d521c	[daum] Fix add view_count, comment_count to test	2016-01-30 11:09:30 +08:00
ping	e047922be0	[daum] Fix copy-paste mistake	2016-01-30 11:04:11 +08:00
Sergey M․	83ab8a79cc	[espn] Improve video id extraction (Closes #8368 )	2016-01-30 01:48:54 +06:00
Sergey M․	350cf045d8	[extractor/common] Restrict checks when auto calculating tbr	2016-01-30 01:47:46 +06:00
Sergey M․	68a0ea15b4	[cspan] Unescape path (Closes #8365 )	2016-01-30 00:26:33 +06:00
Jaime Marquínez Ferrándiz	2b4f5e68d1	[azubu] Add extractor for live streams (closes #8343 )	2016-01-29 15:36:33 +01:00
Philipp Hagemeister	055f417278	release 2016.01.29	2016-01-29 12:20:08 +01:00
Jaime Marquínez Ferrándiz	70029bc348	[youtube:user] Require 'https?://' in the url (fixes #8356 ) It was matching www.youtube.com/embed/WpfukLMe1TM. The generic extractor automatically adds http:// if it's missing.	2016-01-29 11:27:11 +01:00
Sergey M․	1ac6e794cb	[bbc] Add test for #8147	2016-01-28 23:27:48 +06:00
Sergey M․	a853427427	[bbc] Add another description regex	2016-01-28 23:23:13 +06:00
Sergey M․	50e989e263	[bbc] Add another title regex (Closes #8340 )	2016-01-28 23:19:53 +06:00
Sergey M․	10e6ed9341	[ok] Add support for mobile URLs (Closes #8345 )	2016-01-28 22:56:49 +06:00
Sergey M․	38c84acae5	[ndr:embed:base] Add missing ext for m3u8	2016-01-28 22:50:18 +06:00
Yen Chi Hsuan	29f46c2bee	Credit @dyn888 for improving format selection [ci skip]	2016-01-28 22:56:59 +08:00
Yen Chi Hsuan	39c10a2b6e	Merge pull request #8346 from dyn888/dyn888-regex-1 Regex pattern update to match more codecs (fixes #6858)	2016-01-28 22:22:43 +08:00
dyn888	b913348d5f	Test codec with a dot '.' in name selection.	2016-01-28 15:07:33 +01:00
dyn888	b0df5223be	Update YoutubeDL.py	2016-01-28 12:07:15 +01:00
Sergey M․	ed7cd1e859	[cbsnews] Remove unused import	2016-01-28 00:42:04 +06:00
remitamine	f125d9115b	[cbsnews] extract all formats	2016-01-27 19:11:21 +01:00
remitamine	a9d5f12fec	Merge pull request #8328 from remitamine/hls-master-detect [extractor/common] detect media playlist in _extract_m3u8_formats	2016-01-27 18:07:30 +01:00
remitamine	7f32e5dc35	[extractor/common] detect media playlist in _extract_m3u8_formats	2016-01-27 17:53:42 +01:00
Sergey M․	c3111ab34f	[spankbang] Fix title extraction (Closes #8329 )	2016-01-27 21:49:56 +06:00
Sergey M․	9339774af2	[spankbang] Fix formats extraction	2016-01-27 21:49:39 +06:00
Sergey M․	b0d21deda9	[extractor/common] Auto calculate tbr when missing	2016-01-27 21:11:17 +06:00
ping	b6c33fd544	[daum.net] Fixes #8331	2016-01-27 12:48:00 +08:00