release 2013.12.17.1

Add webpage_url_basename info_dict field (Fixes #1938 )
release 2013.12.17
2026-04-24 13:10:26 +00:00 · 2013-12-17 04:13:41 +01:00 · 2013-12-17 04:13:36 +01:00 · 2013-12-17 02:51:22 +01:00 · 2013-12-17 02:49:56 +01:00 · 2013-12-17 02:41:34 +01:00
16 changed files with 120 additions and 50 deletions
--- a/test/test_playlists.py
+++ b/test/test_playlists.py
@@ -12,6 +12,7 @@ from test.helper import FakeYDL


 from youtube_dl.extractor import (
+    AcademicEarthCourseIE,
    DailymotionPlaylistIE,
    DailymotionUserIE,
    VimeoChannelIE,
@@ -158,5 +159,16 @@ class TestPlaylists(unittest.TestCase):
        self.assertEqual(result['title'], u'Inspector')
        self.assertTrue(len(result['entries']) >= 9)

+    def test_AcademicEarthCourse(self):
+        dl = FakeYDL()
+        ie = AcademicEarthCourseIE(dl)
+        result = ie.extract(u'http://academicearth.org/courses/building-dynamic-websites/')
+        self.assertIsPlaylist(result)
+        self.assertEqual(result['id'], u'building-dynamic-websites')
+        self.assertEqual(result['title'], u'Building Dynamic Websites')
+        self.assertEqual(result['description'], u"Today's websites are increasingly dynamic. Pages are no longer static HTML files but instead generated by scripts and database calls. User interfaces are more seamless, with technologies like Ajax replacing traditional page reloads. This course teaches students how to build dynamic websites with Ajax and with Linux, Apache, MySQL, and PHP (LAMP), one of today's most popular frameworks. Students learn how to set up domain names with DNS, how to structure pages with XHTML and CSS, how to program in JavaScript and PHP, how to configure Apache and MySQL, how to design and query databases with SQL, how to use Ajax with both XML and JSON, and how to build mashups. The course explores issues of security, scalability, and cross-browser support and also discusses enterprise-level deployments of websites, including third-party hosting, virtualization, colocation in data centers, firewalling, and load-balancing.")
+        self.assertEqual(len(result['entries']), 10)
+
+
 if __name__ == '__main__':
    unittest.main()
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -13,20 +13,21 @@ import xml.etree.ElementTree

 #from youtube_dl.utils import htmlentity_transform
 from youtube_dl.utils import (
-    timeconvert,
-    sanitize_filename,
-    unescapeHTML,
-    orderedSet,
    DateRange,
-    unified_strdate,
+    encodeFilename,
    find_xpath_attr,
    get_meta_content,
-    xpath_with_ns,
-    smuggle_url,
-    unsmuggle_url,
+    orderedSet,
+    sanitize_filename,
    shell_quote,
-    encodeFilename,
+    smuggle_url,
    str_to_int,
+    timeconvert,
+    unescapeHTML,
+    unified_strdate,
+    unsmuggle_url,
+    url_basename,
+    xpath_with_ns,
 )

 if sys.version_info < (3, 0):
@@ -181,6 +182,12 @@ class TestUtil(unittest.TestCase):
        self.assertEqual(str_to_int('123,456'), 123456)
        self.assertEqual(str_to_int('123.456'), 123456)

+    def test_url_basename(self):
+        self.assertEqual(url_basename(u'http://foo.de/'), u'')
+        self.assertEqual(url_basename(u'http://foo.de/bar/baz'), u'baz')
+        self.assertEqual(url_basename(u'http://foo.de/bar/baz?x=y'), u'baz')
+        self.assertEqual(url_basename(u'http://foo.de/bar/baz#x=y'), u'baz')
+        self.assertEqual(url_basename(u'http://foo.de/bar/baz/'), u'baz')

 if __name__ == '__main__':
    unittest.main()
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -47,6 +47,7 @@ from .utils import (
    subtitles_filename,
    takewhile_inclusive,
    UnavailableVideoError,
+    url_basename,
    write_json_file,
    write_string,
    YoutubeDLHandler,
@@ -484,6 +485,7 @@ class YoutubeDL(object):
                    {
                        'extractor': ie.IE_NAME,
                        'webpage_url': url,
+                        'webpage_url_basename': url_basename(url),
                        'extractor_key': ie.ie_key(),
                    })
                if process:
@@ -576,6 +578,7 @@ class YoutubeDL(object):
                    'playlist_index': i + playliststart,
                    'extractor': ie_result['extractor'],
                    'webpage_url': ie_result['webpage_url'],
+                    'webpage_url_basename': url_basename(ie_result['webpage_url']),
                    'extractor_key': ie_result['extractor_key'],
                }

@@ -596,6 +599,7 @@ class YoutubeDL(object):
                    {
                        'extractor': ie_result['extractor'],
                        'webpage_url': ie_result['webpage_url'],
+                        'webpage_url_basename': url_basename(ie_result['webpage_url']),
                        'extractor_key': ie_result['extractor_key'],
                    })
                return r
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@@ -1,6 +1,7 @@
-from .appletrailers import AppleTrailersIE
+from .academicearth import AcademicEarthCourseIE
 from .addanime import AddAnimeIE
 from .anitube import AnitubeIE
+from .appletrailers import AppleTrailersIE
 from .archiveorg import ArchiveOrgIE
 from .ard import ARDIE
 from .arte import (
--- a/youtube_dl/extractor/academicearth.py
+++ b/youtube_dl/extractor/academicearth.py
@@ -0,0 +1,36 @@
+import datetime
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    remove_start,
+)
+
+
+class AcademicEarthCourseIE(InfoExtractor):
+    _VALID_URL = r'^https?://(?:www\.)?academicearth\.org/(?:courses|playlists)/(?P<id>[^?#/]+)'
+    IE_NAME = u'AcademicEarth:Course'
+
+    def _real_extract(self, url):
+        m = re.match(self._VALID_URL, url)
+        playlist_id = m.group('id')
+
+        webpage = self._download_webpage(url, playlist_id)
+        title = self._html_search_regex(
+            r'<h1 class="playlist-name">(.*?)</h1>', webpage, u'title')
+        description = self._html_search_regex(
+            r'<p class="excerpt">(.*?)</p>',
+            webpage, u'description', fatal=False)
+        urls = re.findall(
+            r'<h3 class="lecture-title"><a target="_blank" href="([^"]+)">',
+            webpage)
+        entries = [self.url_result(u) for u in urls]
+
+        return {
+            '_type': 'playlist',
+            'id': playlist_id,
+            'title': title,
+            'description': description,
+            'entries': entries,
+        }
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@@ -266,20 +266,6 @@ class ArteTVDDCIE(ArteTVPlus7IE):
    IE_NAME = u'arte.tv:ddc'
    _VALID_URL = r'http?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>.+)'

-    _TEST = {
-        u'url': u'http://ddc.arte.tv/folge/neues-aus-mauretanien',
-        u'file': u'049881-009_PLUS7-D.flv',
-        u'info_dict': {
-            u'title': u'Mit offenen Karten',
-            u'description': u'md5:57929b0eaeddeb8a0c983f58e9ebd3b6',
-            u'upload_date': u'20131207',
-        },
-        u'params': {
-            # rtmp download
-            u'skip_download': True,
-        },
-    }
-
    def _real_extract(self, url):
        video_id, lang = self._extract_url_info(url)
        if lang == 'folge':
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -18,6 +18,7 @@ from ..utils import (
    sanitize_filename,
    unescapeHTML,
 )
+_NO_DEFAULT = object()


 class InfoExtractor(object):
@@ -281,7 +282,7 @@ class InfoExtractor(object):
            video_info['title'] = playlist_title
        return video_info

-    def _search_regex(self, pattern, string, name, default=None, fatal=True, flags=0):
+    def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0):
        """
        Perform a regex search on the given string, using a single or a list of
        patterns returning the first matching group.
@@ -295,7 +296,7 @@ class InfoExtractor(object):
                mobj = re.search(p, string, flags)
                if mobj: break

-        if sys.stderr.isatty() and os.name != 'nt':
+        if os.name != 'nt' and sys.stderr.isatty():
            _name = u'\033[0;34m%s\033[0m' % name
        else:
            _name = name
@@ -303,7 +304,7 @@ class InfoExtractor(object):
        if mobj:
            # return the first matching group
            return next(g for g in mobj.groups() if g is not None)
-        elif default is not None:
+        elif default is not _NO_DEFAULT:
            return default
        elif fatal:
            raise RegexNotFoundError(u'Unable to extract %s' % _name)
@@ -312,7 +313,7 @@ class InfoExtractor(object):
                u'please report this issue on http://yt-dl.org/bug' % _name)
            return None

-    def _html_search_regex(self, pattern, string, name, default=None, fatal=True, flags=0):
+    def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0):
        """
        Like _search_regex, but strips HTML tags and unescapes entities.
        """
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -241,6 +241,12 @@ class GenericIE(InfoExtractor):
            # Don't set the extractor because it can be a track url or an album
            return self.url_result(burl)

+        # Look for embedded Vevo player
+        mobj = re.search(
+            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
+        if mobj is not None:
+            return self.url_result(mobj.group('url'))
+
        # Start with something easy: JW Player in SWFObject
        mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
        if mobj is None:
--- a/youtube_dl/extractor/mtv.py
+++ b/youtube_dl/extractor/mtv.py
@@ -93,7 +93,9 @@ class MTVServicesInfoExtractor(InfoExtractor):


 class MTVIE(MTVServicesInfoExtractor):
-    _VALID_URL = r'^https?://(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$'
+    _VALID_URL = r'''(?x)^https?://
+        (?:(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$|
+           m\.mtv\.com/videos/video\.rbml\?.*?id=(?P<mgid>[^&]+))'''

    _FEED_URL = 'http://www.mtv.com/player/embed/AS3/rss/'

@@ -127,16 +129,17 @@ class MTVIE(MTVServicesInfoExtractor):
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('videoid')
-
-        webpage = self._download_webpage(url, video_id)
-
-        # Some videos come from Vevo.com
-        m_vevo = re.search(r'isVevoVideo = true;.*?vevoVideoId = "(.*?)";',
-                           webpage, re.DOTALL)
-        if m_vevo:
-            vevo_id = m_vevo.group(1);
-            self.to_screen(u'Vevo video detected: %s' % vevo_id)
-            return self.url_result('vevo:%s' % vevo_id, ie='Vevo')
-
-        uri = self._html_search_regex(r'/uri/(.*?)\?', webpage, u'uri')
+        uri = mobj.group('mgid')
+        if uri is None:
+            webpage = self._download_webpage(url, video_id)
+    
+            # Some videos come from Vevo.com
+            m_vevo = re.search(r'isVevoVideo = true;.*?vevoVideoId = "(.*?)";',
+                               webpage, re.DOTALL)
+            if m_vevo:
+                vevo_id = m_vevo.group(1);
+                self.to_screen(u'Vevo video detected: %s' % vevo_id)
+                return self.url_result('vevo:%s' % vevo_id, ie='Vevo')
+    
+            uri = self._html_search_regex(r'/uri/(.*?)\?', webpage, u'uri')
        return self._get_videos_info(uri)
--- a/youtube_dl/extractor/radiofrance.py
+++ b/youtube_dl/extractor/radiofrance.py
@@ -15,12 +15,12 @@ class RadioFranceIE(InfoExtractor):

    _TEST = {
        u'url': u'http://maison.radiofrance.fr/radiovisions/one-one',
-        u'file': u'one-one.mp4',
-        u'md5': u'todo',
+        u'file': u'one-one.ogg',
+        u'md5': u'bdbb28ace95ed0e04faab32ba3160daf',
        u'info_dict': {
            u"title": u"One to one",
            u"description": u"Plutôt que d'imaginer la radio de demain comme technologie ou comme création de contenu, je veux montrer que quelles que soient ses évolutions, j'ai l'intime conviction que la radio continuera d'être un grand média de proximité pour les auditeurs.",
-            u"uploader": u"ferdi",
+            u"uploader": u"Thomas Hercouët",
        },
    }

--- a/youtube_dl/extractor/rtlnow.py
+++ b/youtube_dl/extractor/rtlnow.py
@@ -10,7 +10,7 @@ from ..utils import (

 class RTLnowIE(InfoExtractor):
    """Information Extractor for RTL NOW, RTL2 NOW, RTL NITRO, SUPER RTL NOW, VOX NOW and n-tv NOW"""
-    _VALID_URL = r'(?:http://)?(?P<url>(?P<base_url>rtl-now\.rtl\.de|rtl2now\.rtl2\.de|(?:www\.)?voxnow\.de|(?:www\.)?rtlnitronow\.de|(?:www\.)?superrtlnow\.de|(?:www\.)?n-tvnow\.de)/+[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
+    _VALID_URL = r'(?:http://)?(?P<url>(?P<domain>rtl-now\.rtl\.de|rtl2now\.rtl2\.de|(?:www\.)?voxnow\.de|(?:www\.)?rtlnitronow\.de|(?:www\.)?superrtlnow\.de|(?:www\.)?n-tvnow\.de)/+[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
    _TESTS = [{
        u'url': u'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1',
        u'file': u'90419.flv',
@@ -82,7 +82,7 @@ class RTLnowIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url)

        webpage_url = u'http://' + mobj.group('url')
-        video_page_url = u'http://' + mobj.group('base_url')
+        video_page_url = u'http://' + mobj.group('domain') + u'/'
        video_id = mobj.group(u'video_id')

        webpage = self._download_webpage(webpage_url, video_id)
--- a/youtube_dl/extractor/vevo.py
+++ b/youtube_dl/extractor/vevo.py
@@ -15,7 +15,12 @@ class VevoIE(InfoExtractor):
    Accepts urls from vevo.com or in the format 'vevo:{id}'
    (currently used by MTVIE)
    """
-    _VALID_URL = r'((http://www\.vevo\.com/watch/(?:[^/]+/[^/]+/)?)|(vevo:))(?P<id>.*?)(\?|$)'
+    _VALID_URL = r'''(?x)
+        (?:https?://www\.vevo\.com/watch/(?:[^/]+/[^/]+/)?|
+           https?://cache\.vevo\.com/m/html/embed\.html\?video=|
+           https?://videoplayer\.vevo\.com/embed/embedded\?videoId=|
+           vevo:)
+        (?P<id>[^&?#]+)'''
    _TESTS = [{
        u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
        u'file': u'GB1101300280.mp4',
--- a/youtube_dl/extractor/xtube.py
+++ b/youtube_dl/extractor/xtube.py
@@ -32,7 +32,7 @@ class XTubeIE(InfoExtractor):

        video_title = self._html_search_regex(r'<div class="p_5px[^>]*>([^<]+)', webpage, u'title')
        video_uploader = self._html_search_regex(r'so_s\.addVariable\("owner_u", "([^"]+)', webpage, u'uploader', fatal=False)
-        video_description = self._html_search_regex(r'<p class="video_description">([^<]+)', webpage, u'description', default=None)
+        video_description = self._html_search_regex(r'<p class="video_description">([^<]+)', webpage, u'description', fatal=False)
        video_url= self._html_search_regex(r'var videoMp4 = "([^"]+)', webpage, u'video_url').replace('\\/', '/')
        path = compat_urllib_parse_urlparse(video_url).path
        extension = os.path.splitext(path)[1][1:]
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1361,7 +1361,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                video_description = u''

        def _extract_count(klass):
-            count = self._search_regex(r'class="%s">([\d,]+)</span>' % re.escape(klass), video_webpage, klass, fatal=False)
+            count = self._search_regex(
+                r'class="%s">([\d,]+)</span>' % re.escape(klass),
+                video_webpage, klass, default=None)
            if count is not None:
                return int(count.replace(',', ''))
            return None
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1084,3 +1084,10 @@ def remove_start(s, start):
    if s.startswith(start):
        return s[len(start):]
    return s
+
+
+def url_basename(url):
+    m = re.match(r'(?:https?:|)//[^/]+/(?:[^/?#]+/)?([^/?#]+)/?(?:[?#]|$)', url)
+    if not m:
+        return u''
+    return m.group(1)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@

-__version__ = '2013.12.16.5'
+__version__ = '2013.12.17.1'
Author	SHA1	Message	Date
Philipp Hagemeister	f09828b4e1	release 2013.12.17.1	2013-12-17 04:13:41 +01:00
Philipp Hagemeister	29eb517403	Add webpage_url_basename info_dict field (Fixes #1938 )	2013-12-17 04:13:36 +01:00
Philipp Hagemeister	44c471c3b8	release 2013.12.17	2013-12-17 02:51:22 +01:00
Philipp Hagemeister	46374a56b2	[youtube] Do not warn for videos with allow_rating=0 This fixes #1982 Test video: http://www.youtube.com/watch?v=gi2uH3YxohU	2013-12-17 02:49:56 +01:00
Philipp Hagemeister	ec98946ef9	[academicearth] Support playlists (Closes #1976 )	2013-12-17 02:41:34 +01:00
Philipp Hagemeister	fa77b742ac	[radiofrance] Fill in test details	2013-12-16 23:07:57 +01:00
Philipp Hagemeister	8b4e274610	[rtlnow] Fix URL calculation (Closes #1989 )	2013-12-16 22:28:52 +01:00
Philipp Hagemeister	d6756d3758	[playlist-test] require a string	2013-12-16 22:25:02 +01:00
Philipp Hagemeister	11b68f6e1b	release 2013.12.16.7	2013-12-16 22:18:58 +01:00
Philipp Hagemeister	88bb52ee18	Merge branch 'master' of github.com:rg3/youtube-dl	2013-12-16 22:18:37 +01:00
Philipp Hagemeister	d90df974c3	[academicearth] Add support for courses (#1976 )	2013-12-16 22:18:27 +01:00
Jaime Marquínez Ferrándiz	5c541b2cb7	[mtv] Add support for urls from the mobile site (fixes #1959 )	2013-12-16 22:05:28 +01:00
Itay Brandes	87a28127d2	_search_regex's "isatty" call fails with Py2exe's _search_regex calls the sys.stderr.isatty() function for unix systems. Py2exe uses a custom Stderr() stream which doesn't have an `isatty()` function, leading to it's crash. Fixes easily with checking that it's a unix system first.	2013-12-16 21:50:26 +01:00
Philipp Hagemeister	ebce53b3d8	[vevo] Add suppor for videoplayer. URLs (#1957 )	2013-12-16 21:48:38 +01:00
Philipp Hagemeister	83c632dc43	release 2013.12.16.6	2013-12-16 21:46:16 +01:00
Philipp Hagemeister	ff07a05575	Merge branch 'master' of github.com:rg3/youtube-dl	2013-12-16 21:46:11 +01:00
Philipp Hagemeister	f25571ffbf	Add support for embedded vevo player (Fixes #1957 )	2013-12-16 21:45:21 +01:00
Jaime Marquínez Ferrándiz	f7a6892572	[arte:ddc] Remove test video seems to expire in 7 days, as arte+7	2013-12-16 21:42:41 +01:00