1
0
mirror of https://source.netsyms.com/Mirrors/youtube-dl synced 2026-04-24 13:10:26 +00:00

Compare commits

...

18 Commits

Author SHA1 Message Date
Philipp Hagemeister
f09828b4e1 release 2013.12.17.1 2013-12-17 04:13:41 +01:00
Philipp Hagemeister
29eb517403 Add webpage_url_basename info_dict field (Fixes #1938) 2013-12-17 04:13:36 +01:00
Philipp Hagemeister
44c471c3b8 release 2013.12.17 2013-12-17 02:51:22 +01:00
Philipp Hagemeister
46374a56b2 [youtube] Do not warn for videos with allow_rating=0
This fixes #1982
Test video: http://www.youtube.com/watch?v=gi2uH3YxohU
2013-12-17 02:49:56 +01:00
Philipp Hagemeister
ec98946ef9 [academicearth] Support playlists (Closes #1976) 2013-12-17 02:41:34 +01:00
Philipp Hagemeister
fa77b742ac [radiofrance] Fill in test details 2013-12-16 23:07:57 +01:00
Philipp Hagemeister
8b4e274610 [rtlnow] Fix URL calculation (Closes #1989) 2013-12-16 22:28:52 +01:00
Philipp Hagemeister
d6756d3758 [playlist-test] require a string 2013-12-16 22:25:02 +01:00
Philipp Hagemeister
11b68f6e1b release 2013.12.16.7 2013-12-16 22:18:58 +01:00
Philipp Hagemeister
88bb52ee18 Merge branch 'master' of github.com:rg3/youtube-dl 2013-12-16 22:18:37 +01:00
Philipp Hagemeister
d90df974c3 [academicearth] Add support for courses (#1976) 2013-12-16 22:18:27 +01:00
Jaime Marquínez Ferrándiz
5c541b2cb7 [mtv] Add support for urls from the mobile site (fixes #1959) 2013-12-16 22:05:28 +01:00
Itay Brandes
87a28127d2 _search_regex's "isatty" call fails with Py2exe's
_search_regex calls the sys.stderr.isatty() function for unix systems.

Py2exe uses a custom Stderr() stream which doesn't have an `isatty()`
function, leading to it's crash.

Fixes easily with checking that it's a unix system first.
2013-12-16 21:50:26 +01:00
Philipp Hagemeister
ebce53b3d8 [vevo] Add suppor for videoplayer. URLs (#1957) 2013-12-16 21:48:38 +01:00
Philipp Hagemeister
83c632dc43 release 2013.12.16.6 2013-12-16 21:46:16 +01:00
Philipp Hagemeister
ff07a05575 Merge branch 'master' of github.com:rg3/youtube-dl 2013-12-16 21:46:11 +01:00
Philipp Hagemeister
f25571ffbf Add support for embedded vevo player (Fixes #1957) 2013-12-16 21:45:21 +01:00
Jaime Marquínez Ferrándiz
f7a6892572 [arte:ddc] Remove test
video seems to expire in 7 days, as arte+7
2013-12-16 21:42:41 +01:00
16 changed files with 120 additions and 50 deletions

View File

@@ -12,6 +12,7 @@ from test.helper import FakeYDL
from youtube_dl.extractor import (
AcademicEarthCourseIE,
DailymotionPlaylistIE,
DailymotionUserIE,
VimeoChannelIE,
@@ -158,5 +159,16 @@ class TestPlaylists(unittest.TestCase):
self.assertEqual(result['title'], u'Inspector')
self.assertTrue(len(result['entries']) >= 9)
def test_AcademicEarthCourse(self):
dl = FakeYDL()
ie = AcademicEarthCourseIE(dl)
result = ie.extract(u'http://academicearth.org/courses/building-dynamic-websites/')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], u'building-dynamic-websites')
self.assertEqual(result['title'], u'Building Dynamic Websites')
self.assertEqual(result['description'], u"Today's websites are increasingly dynamic. Pages are no longer static HTML files but instead generated by scripts and database calls. User interfaces are more seamless, with technologies like Ajax replacing traditional page reloads. This course teaches students how to build dynamic websites with Ajax and with Linux, Apache, MySQL, and PHP (LAMP), one of today's most popular frameworks. Students learn how to set up domain names with DNS, how to structure pages with XHTML and CSS, how to program in JavaScript and PHP, how to configure Apache and MySQL, how to design and query databases with SQL, how to use Ajax with both XML and JSON, and how to build mashups. The course explores issues of security, scalability, and cross-browser support and also discusses enterprise-level deployments of websites, including third-party hosting, virtualization, colocation in data centers, firewalling, and load-balancing.")
self.assertEqual(len(result['entries']), 10)
if __name__ == '__main__':
unittest.main()

View File

@@ -13,20 +13,21 @@ import xml.etree.ElementTree
#from youtube_dl.utils import htmlentity_transform
from youtube_dl.utils import (
timeconvert,
sanitize_filename,
unescapeHTML,
orderedSet,
DateRange,
unified_strdate,
encodeFilename,
find_xpath_attr,
get_meta_content,
xpath_with_ns,
smuggle_url,
unsmuggle_url,
orderedSet,
sanitize_filename,
shell_quote,
encodeFilename,
smuggle_url,
str_to_int,
timeconvert,
unescapeHTML,
unified_strdate,
unsmuggle_url,
url_basename,
xpath_with_ns,
)
if sys.version_info < (3, 0):
@@ -181,6 +182,12 @@ class TestUtil(unittest.TestCase):
self.assertEqual(str_to_int('123,456'), 123456)
self.assertEqual(str_to_int('123.456'), 123456)
def test_url_basename(self):
self.assertEqual(url_basename(u'http://foo.de/'), u'')
self.assertEqual(url_basename(u'http://foo.de/bar/baz'), u'baz')
self.assertEqual(url_basename(u'http://foo.de/bar/baz?x=y'), u'baz')
self.assertEqual(url_basename(u'http://foo.de/bar/baz#x=y'), u'baz')
self.assertEqual(url_basename(u'http://foo.de/bar/baz/'), u'baz')
if __name__ == '__main__':
unittest.main()

View File

@@ -47,6 +47,7 @@ from .utils import (
subtitles_filename,
takewhile_inclusive,
UnavailableVideoError,
url_basename,
write_json_file,
write_string,
YoutubeDLHandler,
@@ -484,6 +485,7 @@ class YoutubeDL(object):
{
'extractor': ie.IE_NAME,
'webpage_url': url,
'webpage_url_basename': url_basename(url),
'extractor_key': ie.ie_key(),
})
if process:
@@ -576,6 +578,7 @@ class YoutubeDL(object):
'playlist_index': i + playliststart,
'extractor': ie_result['extractor'],
'webpage_url': ie_result['webpage_url'],
'webpage_url_basename': url_basename(ie_result['webpage_url']),
'extractor_key': ie_result['extractor_key'],
}
@@ -596,6 +599,7 @@ class YoutubeDL(object):
{
'extractor': ie_result['extractor'],
'webpage_url': ie_result['webpage_url'],
'webpage_url_basename': url_basename(ie_result['webpage_url']),
'extractor_key': ie_result['extractor_key'],
})
return r

View File

@@ -1,6 +1,7 @@
from .appletrailers import AppleTrailersIE
from .academicearth import AcademicEarthCourseIE
from .addanime import AddAnimeIE
from .anitube import AnitubeIE
from .appletrailers import AppleTrailersIE
from .archiveorg import ArchiveOrgIE
from .ard import ARDIE
from .arte import (

View File

@@ -0,0 +1,36 @@
import datetime
import json
import re
from .common import InfoExtractor
from ..utils import (
remove_start,
)
class AcademicEarthCourseIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?academicearth\.org/(?:courses|playlists)/(?P<id>[^?#/]+)'
IE_NAME = u'AcademicEarth:Course'
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
playlist_id = m.group('id')
webpage = self._download_webpage(url, playlist_id)
title = self._html_search_regex(
r'<h1 class="playlist-name">(.*?)</h1>', webpage, u'title')
description = self._html_search_regex(
r'<p class="excerpt">(.*?)</p>',
webpage, u'description', fatal=False)
urls = re.findall(
r'<h3 class="lecture-title"><a target="_blank" href="([^"]+)">',
webpage)
entries = [self.url_result(u) for u in urls]
return {
'_type': 'playlist',
'id': playlist_id,
'title': title,
'description': description,
'entries': entries,
}

View File

@@ -266,20 +266,6 @@ class ArteTVDDCIE(ArteTVPlus7IE):
IE_NAME = u'arte.tv:ddc'
_VALID_URL = r'http?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>.+)'
_TEST = {
u'url': u'http://ddc.arte.tv/folge/neues-aus-mauretanien',
u'file': u'049881-009_PLUS7-D.flv',
u'info_dict': {
u'title': u'Mit offenen Karten',
u'description': u'md5:57929b0eaeddeb8a0c983f58e9ebd3b6',
u'upload_date': u'20131207',
},
u'params': {
# rtmp download
u'skip_download': True,
},
}
def _real_extract(self, url):
video_id, lang = self._extract_url_info(url)
if lang == 'folge':

View File

@@ -18,6 +18,7 @@ from ..utils import (
sanitize_filename,
unescapeHTML,
)
_NO_DEFAULT = object()
class InfoExtractor(object):
@@ -281,7 +282,7 @@ class InfoExtractor(object):
video_info['title'] = playlist_title
return video_info
def _search_regex(self, pattern, string, name, default=None, fatal=True, flags=0):
def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0):
"""
Perform a regex search on the given string, using a single or a list of
patterns returning the first matching group.
@@ -295,7 +296,7 @@ class InfoExtractor(object):
mobj = re.search(p, string, flags)
if mobj: break
if sys.stderr.isatty() and os.name != 'nt':
if os.name != 'nt' and sys.stderr.isatty():
_name = u'\033[0;34m%s\033[0m' % name
else:
_name = name
@@ -303,7 +304,7 @@ class InfoExtractor(object):
if mobj:
# return the first matching group
return next(g for g in mobj.groups() if g is not None)
elif default is not None:
elif default is not _NO_DEFAULT:
return default
elif fatal:
raise RegexNotFoundError(u'Unable to extract %s' % _name)
@@ -312,7 +313,7 @@ class InfoExtractor(object):
u'please report this issue on http://yt-dl.org/bug' % _name)
return None
def _html_search_regex(self, pattern, string, name, default=None, fatal=True, flags=0):
def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0):
"""
Like _search_regex, but strips HTML tags and unescapes entities.
"""

View File

@@ -241,6 +241,12 @@ class GenericIE(InfoExtractor):
# Don't set the extractor because it can be a track url or an album
return self.url_result(burl)
# Look for embedded Vevo player
mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
if mobj is not None:
return self.url_result(mobj.group('url'))
# Start with something easy: JW Player in SWFObject
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
if mobj is None:

View File

@@ -93,7 +93,9 @@ class MTVServicesInfoExtractor(InfoExtractor):
class MTVIE(MTVServicesInfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$'
_VALID_URL = r'''(?x)^https?://
(?:(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$|
m\.mtv\.com/videos/video\.rbml\?.*?id=(?P<mgid>[^&]+))'''
_FEED_URL = 'http://www.mtv.com/player/embed/AS3/rss/'
@@ -127,16 +129,17 @@ class MTVIE(MTVServicesInfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid')
webpage = self._download_webpage(url, video_id)
# Some videos come from Vevo.com
m_vevo = re.search(r'isVevoVideo = true;.*?vevoVideoId = "(.*?)";',
webpage, re.DOTALL)
if m_vevo:
vevo_id = m_vevo.group(1);
self.to_screen(u'Vevo video detected: %s' % vevo_id)
return self.url_result('vevo:%s' % vevo_id, ie='Vevo')
uri = self._html_search_regex(r'/uri/(.*?)\?', webpage, u'uri')
uri = mobj.group('mgid')
if uri is None:
webpage = self._download_webpage(url, video_id)
# Some videos come from Vevo.com
m_vevo = re.search(r'isVevoVideo = true;.*?vevoVideoId = "(.*?)";',
webpage, re.DOTALL)
if m_vevo:
vevo_id = m_vevo.group(1);
self.to_screen(u'Vevo video detected: %s' % vevo_id)
return self.url_result('vevo:%s' % vevo_id, ie='Vevo')
uri = self._html_search_regex(r'/uri/(.*?)\?', webpage, u'uri')
return self._get_videos_info(uri)

View File

@@ -15,12 +15,12 @@ class RadioFranceIE(InfoExtractor):
_TEST = {
u'url': u'http://maison.radiofrance.fr/radiovisions/one-one',
u'file': u'one-one.mp4',
u'md5': u'todo',
u'file': u'one-one.ogg',
u'md5': u'bdbb28ace95ed0e04faab32ba3160daf',
u'info_dict': {
u"title": u"One to one",
u"description": u"Plutôt que d'imaginer la radio de demain comme technologie ou comme création de contenu, je veux montrer que quelles que soient ses évolutions, j'ai l'intime conviction que la radio continuera d'être un grand média de proximité pour les auditeurs.",
u"uploader": u"ferdi",
u"uploader": u"Thomas Hercouët",
},
}

View File

@@ -10,7 +10,7 @@ from ..utils import (
class RTLnowIE(InfoExtractor):
"""Information Extractor for RTL NOW, RTL2 NOW, RTL NITRO, SUPER RTL NOW, VOX NOW and n-tv NOW"""
_VALID_URL = r'(?:http://)?(?P<url>(?P<base_url>rtl-now\.rtl\.de|rtl2now\.rtl2\.de|(?:www\.)?voxnow\.de|(?:www\.)?rtlnitronow\.de|(?:www\.)?superrtlnow\.de|(?:www\.)?n-tvnow\.de)/+[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
_VALID_URL = r'(?:http://)?(?P<url>(?P<domain>rtl-now\.rtl\.de|rtl2now\.rtl2\.de|(?:www\.)?voxnow\.de|(?:www\.)?rtlnitronow\.de|(?:www\.)?superrtlnow\.de|(?:www\.)?n-tvnow\.de)/+[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
_TESTS = [{
u'url': u'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1',
u'file': u'90419.flv',
@@ -82,7 +82,7 @@ class RTLnowIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
webpage_url = u'http://' + mobj.group('url')
video_page_url = u'http://' + mobj.group('base_url')
video_page_url = u'http://' + mobj.group('domain') + u'/'
video_id = mobj.group(u'video_id')
webpage = self._download_webpage(webpage_url, video_id)

View File

@@ -15,7 +15,12 @@ class VevoIE(InfoExtractor):
Accepts urls from vevo.com or in the format 'vevo:{id}'
(currently used by MTVIE)
"""
_VALID_URL = r'((http://www\.vevo\.com/watch/(?:[^/]+/[^/]+/)?)|(vevo:))(?P<id>.*?)(\?|$)'
_VALID_URL = r'''(?x)
(?:https?://www\.vevo\.com/watch/(?:[^/]+/[^/]+/)?|
https?://cache\.vevo\.com/m/html/embed\.html\?video=|
https?://videoplayer\.vevo\.com/embed/embedded\?videoId=|
vevo:)
(?P<id>[^&?#]+)'''
_TESTS = [{
u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
u'file': u'GB1101300280.mp4',

View File

@@ -32,7 +32,7 @@ class XTubeIE(InfoExtractor):
video_title = self._html_search_regex(r'<div class="p_5px[^>]*>([^<]+)', webpage, u'title')
video_uploader = self._html_search_regex(r'so_s\.addVariable\("owner_u", "([^"]+)', webpage, u'uploader', fatal=False)
video_description = self._html_search_regex(r'<p class="video_description">([^<]+)', webpage, u'description', default=None)
video_description = self._html_search_regex(r'<p class="video_description">([^<]+)', webpage, u'description', fatal=False)
video_url= self._html_search_regex(r'var videoMp4 = "([^"]+)', webpage, u'video_url').replace('\\/', '/')
path = compat_urllib_parse_urlparse(video_url).path
extension = os.path.splitext(path)[1][1:]

View File

@@ -1361,7 +1361,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
video_description = u''
def _extract_count(klass):
count = self._search_regex(r'class="%s">([\d,]+)</span>' % re.escape(klass), video_webpage, klass, fatal=False)
count = self._search_regex(
r'class="%s">([\d,]+)</span>' % re.escape(klass),
video_webpage, klass, default=None)
if count is not None:
return int(count.replace(',', ''))
return None

View File

@@ -1084,3 +1084,10 @@ def remove_start(s, start):
if s.startswith(start):
return s[len(start):]
return s
def url_basename(url):
m = re.match(r'(?:https?:|)//[^/]+/(?:[^/?#]+/)?([^/?#]+)/?(?:[?#]|$)', url)
if not m:
return u''
return m.group(1)

View File

@@ -1,2 +1,2 @@
__version__ = '2013.12.16.5'
__version__ = '2013.12.17.1'