release 2013.12.17.1

Add webpage_url_basename info_dict field (Fixes #1938 )
release 2013.12.17
2026-04-24 04:16:53 +00:00 · 2013-12-17 04:13:41 +01:00 · 2013-12-17 04:13:36 +01:00 · 2013-12-17 02:51:22 +01:00 · 2013-12-17 02:49:56 +01:00 · 2013-12-17 02:41:34 +01:00
11 changed files with 43 additions and 22 deletions
--- a/test/test_playlists.py
+++ b/test/test_playlists.py
@@ -166,7 +166,7 @@ class TestPlaylists(unittest.TestCase):
        self.assertIsPlaylist(result)
        self.assertEqual(result['id'], u'building-dynamic-websites')
        self.assertEqual(result['title'], u'Building Dynamic Websites')
-        self.assertEqual(result['description'], "Today's websites are increasingly dynamic. Pages are no longer static HTML files but instead generated by scripts and database calls. User interfaces are more seamless, with technologies like Ajax replacing traditional page reloads. This course teaches students how to build dynamic websites with Ajax and with Linux, Apache, MySQL, and PHP (LAMP), one of today's most popular frameworks. Students learn how to set up domain names with DNS, how to structure pages with XHTML and CSS, how to program in JavaScript and PHP, how to configure Apache and MySQL, how to design and query databases with SQL, how to use Ajax with both XML and JSON, and how to build mashups. The course explores issues of security, scalability, and cross-browser support and also discusses enterprise-level deployments of websites, including third-party hosting, virtualization, colocation in data centers, firewalling, and load-balancing.")
+        self.assertEqual(result['description'], u"Today's websites are increasingly dynamic. Pages are no longer static HTML files but instead generated by scripts and database calls. User interfaces are more seamless, with technologies like Ajax replacing traditional page reloads. This course teaches students how to build dynamic websites with Ajax and with Linux, Apache, MySQL, and PHP (LAMP), one of today's most popular frameworks. Students learn how to set up domain names with DNS, how to structure pages with XHTML and CSS, how to program in JavaScript and PHP, how to configure Apache and MySQL, how to design and query databases with SQL, how to use Ajax with both XML and JSON, and how to build mashups. The course explores issues of security, scalability, and cross-browser support and also discusses enterprise-level deployments of websites, including third-party hosting, virtualization, colocation in data centers, firewalling, and load-balancing.")
        self.assertEqual(len(result['entries']), 10)


--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -13,20 +13,21 @@ import xml.etree.ElementTree

 #from youtube_dl.utils import htmlentity_transform
 from youtube_dl.utils import (
-    timeconvert,
-    sanitize_filename,
-    unescapeHTML,
-    orderedSet,
    DateRange,
-    unified_strdate,
+    encodeFilename,
    find_xpath_attr,
    get_meta_content,
-    xpath_with_ns,
-    smuggle_url,
-    unsmuggle_url,
+    orderedSet,
+    sanitize_filename,
    shell_quote,
-    encodeFilename,
+    smuggle_url,
    str_to_int,
+    timeconvert,
+    unescapeHTML,
+    unified_strdate,
+    unsmuggle_url,
+    url_basename,
+    xpath_with_ns,
 )

 if sys.version_info < (3, 0):
@@ -181,6 +182,12 @@ class TestUtil(unittest.TestCase):
        self.assertEqual(str_to_int('123,456'), 123456)
        self.assertEqual(str_to_int('123.456'), 123456)

+    def test_url_basename(self):
+        self.assertEqual(url_basename(u'http://foo.de/'), u'')
+        self.assertEqual(url_basename(u'http://foo.de/bar/baz'), u'baz')
+        self.assertEqual(url_basename(u'http://foo.de/bar/baz?x=y'), u'baz')
+        self.assertEqual(url_basename(u'http://foo.de/bar/baz#x=y'), u'baz')
+        self.assertEqual(url_basename(u'http://foo.de/bar/baz/'), u'baz')

 if __name__ == '__main__':
    unittest.main()
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -47,6 +47,7 @@ from .utils import (
    subtitles_filename,
    takewhile_inclusive,
    UnavailableVideoError,
+    url_basename,
    write_json_file,
    write_string,
    YoutubeDLHandler,
@@ -484,6 +485,7 @@ class YoutubeDL(object):
                    {
                        'extractor': ie.IE_NAME,
                        'webpage_url': url,
+                        'webpage_url_basename': url_basename(url),
                        'extractor_key': ie.ie_key(),
                    })
                if process:
@@ -576,6 +578,7 @@ class YoutubeDL(object):
                    'playlist_index': i + playliststart,
                    'extractor': ie_result['extractor'],
                    'webpage_url': ie_result['webpage_url'],
+                    'webpage_url_basename': url_basename(ie_result['webpage_url']),
                    'extractor_key': ie_result['extractor_key'],
                }

@@ -596,6 +599,7 @@ class YoutubeDL(object):
                    {
                        'extractor': ie_result['extractor'],
                        'webpage_url': ie_result['webpage_url'],
+                        'webpage_url_basename': url_basename(ie_result['webpage_url']),
                        'extractor_key': ie_result['extractor_key'],
                    })
                return r
--- a/youtube_dl/extractor/academicearth.py
+++ b/youtube_dl/extractor/academicearth.py
@@ -9,7 +9,7 @@ from ..utils import (


 class AcademicEarthCourseIE(InfoExtractor):
-    _VALID_URL = r'^https?://(?:www\.)?academicearth\.org/courses/(?P<id>[^?#/]+)'
+    _VALID_URL = r'^https?://(?:www\.)?academicearth\.org/(?:courses|playlists)/(?P<id>[^?#/]+)'
    IE_NAME = u'AcademicEarth:Course'

    def _real_extract(self, url):
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -18,6 +18,7 @@ from ..utils import (
    sanitize_filename,
    unescapeHTML,
 )
+_NO_DEFAULT = object()


 class InfoExtractor(object):
@@ -281,7 +282,7 @@ class InfoExtractor(object):
            video_info['title'] = playlist_title
        return video_info

-    def _search_regex(self, pattern, string, name, default=None, fatal=True, flags=0):
+    def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0):
        """
        Perform a regex search on the given string, using a single or a list of
        patterns returning the first matching group.
@@ -303,7 +304,7 @@ class InfoExtractor(object):
        if mobj:
            # return the first matching group
            return next(g for g in mobj.groups() if g is not None)
-        elif default is not None:
+        elif default is not _NO_DEFAULT:
            return default
        elif fatal:
            raise RegexNotFoundError(u'Unable to extract %s' % _name)
@@ -312,7 +313,7 @@ class InfoExtractor(object):
                u'please report this issue on http://yt-dl.org/bug' % _name)
            return None

-    def _html_search_regex(self, pattern, string, name, default=None, fatal=True, flags=0):
+    def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0):
        """
        Like _search_regex, but strips HTML tags and unescapes entities.
        """
--- a/youtube_dl/extractor/radiofrance.py
+++ b/youtube_dl/extractor/radiofrance.py
@@ -15,12 +15,12 @@ class RadioFranceIE(InfoExtractor):

    _TEST = {
        u'url': u'http://maison.radiofrance.fr/radiovisions/one-one',
-        u'file': u'one-one.mp4',
-        u'md5': u'todo',
+        u'file': u'one-one.ogg',
+        u'md5': u'bdbb28ace95ed0e04faab32ba3160daf',
        u'info_dict': {
            u"title": u"One to one",
            u"description": u"Plutôt que d'imaginer la radio de demain comme technologie ou comme création de contenu, je veux montrer que quelles que soient ses évolutions, j'ai l'intime conviction que la radio continuera d'être un grand média de proximité pour les auditeurs.",
-            u"uploader": u"ferdi",
+            u"uploader": u"Thomas Hercouët",
        },
    }

--- a/youtube_dl/extractor/rtlnow.py
+++ b/youtube_dl/extractor/rtlnow.py
@@ -10,7 +10,7 @@ from ..utils import (

 class RTLnowIE(InfoExtractor):
    """Information Extractor for RTL NOW, RTL2 NOW, RTL NITRO, SUPER RTL NOW, VOX NOW and n-tv NOW"""
-    _VALID_URL = r'(?:http://)?(?P<url>(?P<base_url>rtl-now\.rtl\.de|rtl2now\.rtl2\.de|(?:www\.)?voxnow\.de|(?:www\.)?rtlnitronow\.de|(?:www\.)?superrtlnow\.de|(?:www\.)?n-tvnow\.de)/+[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
+    _VALID_URL = r'(?:http://)?(?P<url>(?P<domain>rtl-now\.rtl\.de|rtl2now\.rtl2\.de|(?:www\.)?voxnow\.de|(?:www\.)?rtlnitronow\.de|(?:www\.)?superrtlnow\.de|(?:www\.)?n-tvnow\.de)/+[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
    _TESTS = [{
        u'url': u'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1',
        u'file': u'90419.flv',
@@ -82,7 +82,7 @@ class RTLnowIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url)

        webpage_url = u'http://' + mobj.group('url')
-        video_page_url = u'http://' + mobj.group('base_url')
+        video_page_url = u'http://' + mobj.group('domain') + u'/'
        video_id = mobj.group(u'video_id')

        webpage = self._download_webpage(webpage_url, video_id)
--- a/youtube_dl/extractor/xtube.py
+++ b/youtube_dl/extractor/xtube.py
@@ -32,7 +32,7 @@ class XTubeIE(InfoExtractor):

        video_title = self._html_search_regex(r'<div class="p_5px[^>]*>([^<]+)', webpage, u'title')
        video_uploader = self._html_search_regex(r'so_s\.addVariable\("owner_u", "([^"]+)', webpage, u'uploader', fatal=False)
-        video_description = self._html_search_regex(r'<p class="video_description">([^<]+)', webpage, u'description', default=None)
+        video_description = self._html_search_regex(r'<p class="video_description">([^<]+)', webpage, u'description', fatal=False)
        video_url= self._html_search_regex(r'var videoMp4 = "([^"]+)', webpage, u'video_url').replace('\\/', '/')
        path = compat_urllib_parse_urlparse(video_url).path
        extension = os.path.splitext(path)[1][1:]
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1361,7 +1361,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                video_description = u''

        def _extract_count(klass):
-            count = self._search_regex(r'class="%s">([\d,]+)</span>' % re.escape(klass), video_webpage, klass, fatal=False)
+            count = self._search_regex(
+                r'class="%s">([\d,]+)</span>' % re.escape(klass),
+                video_webpage, klass, default=None)
            if count is not None:
                return int(count.replace(',', ''))
            return None
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1084,3 +1084,10 @@ def remove_start(s, start):
    if s.startswith(start):
        return s[len(start):]
    return s
+
+
+def url_basename(url):
+    m = re.match(r'(?:https?:|)//[^/]+/(?:[^/?#]+/)?([^/?#]+)/?(?:[?#]|$)', url)
+    if not m:
+        return u''
+    return m.group(1)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@

-__version__ = '2013.12.16.7'
+__version__ = '2013.12.17.1'
Author	SHA1	Message	Date
Philipp Hagemeister	f09828b4e1	release 2013.12.17.1	2013-12-17 04:13:41 +01:00
Philipp Hagemeister	29eb517403	Add webpage_url_basename info_dict field (Fixes #1938 )	2013-12-17 04:13:36 +01:00
Philipp Hagemeister	44c471c3b8	release 2013.12.17	2013-12-17 02:51:22 +01:00
Philipp Hagemeister	46374a56b2	[youtube] Do not warn for videos with allow_rating=0 This fixes #1982 Test video: http://www.youtube.com/watch?v=gi2uH3YxohU	2013-12-17 02:49:56 +01:00
Philipp Hagemeister	ec98946ef9	[academicearth] Support playlists (Closes #1976 )	2013-12-17 02:41:34 +01:00
Philipp Hagemeister	fa77b742ac	[radiofrance] Fill in test details	2013-12-16 23:07:57 +01:00
Philipp Hagemeister	8b4e274610	[rtlnow] Fix URL calculation (Closes #1989 )	2013-12-16 22:28:52 +01:00
Philipp Hagemeister	d6756d3758	[playlist-test] require a string	2013-12-16 22:25:02 +01:00