mirror of
https://source.netsyms.com/Mirrors/youtube-dl
synced 2026-04-24 13:10:26 +00:00
Compare commits
18 Commits
2013.12.16
...
2013.12.17
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f09828b4e1 | ||
|
|
29eb517403 | ||
|
|
44c471c3b8 | ||
|
|
46374a56b2 | ||
|
|
ec98946ef9 | ||
|
|
fa77b742ac | ||
|
|
8b4e274610 | ||
|
|
d6756d3758 | ||
|
|
11b68f6e1b | ||
|
|
88bb52ee18 | ||
|
|
d90df974c3 | ||
|
|
5c541b2cb7 | ||
|
|
87a28127d2 | ||
|
|
ebce53b3d8 | ||
|
|
83c632dc43 | ||
|
|
ff07a05575 | ||
|
|
f25571ffbf | ||
|
|
f7a6892572 |
@@ -12,6 +12,7 @@ from test.helper import FakeYDL
|
||||
|
||||
|
||||
from youtube_dl.extractor import (
|
||||
AcademicEarthCourseIE,
|
||||
DailymotionPlaylistIE,
|
||||
DailymotionUserIE,
|
||||
VimeoChannelIE,
|
||||
@@ -158,5 +159,16 @@ class TestPlaylists(unittest.TestCase):
|
||||
self.assertEqual(result['title'], u'Inspector')
|
||||
self.assertTrue(len(result['entries']) >= 9)
|
||||
|
||||
def test_AcademicEarthCourse(self):
|
||||
dl = FakeYDL()
|
||||
ie = AcademicEarthCourseIE(dl)
|
||||
result = ie.extract(u'http://academicearth.org/courses/building-dynamic-websites/')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['id'], u'building-dynamic-websites')
|
||||
self.assertEqual(result['title'], u'Building Dynamic Websites')
|
||||
self.assertEqual(result['description'], u"Today's websites are increasingly dynamic. Pages are no longer static HTML files but instead generated by scripts and database calls. User interfaces are more seamless, with technologies like Ajax replacing traditional page reloads. This course teaches students how to build dynamic websites with Ajax and with Linux, Apache, MySQL, and PHP (LAMP), one of today's most popular frameworks. Students learn how to set up domain names with DNS, how to structure pages with XHTML and CSS, how to program in JavaScript and PHP, how to configure Apache and MySQL, how to design and query databases with SQL, how to use Ajax with both XML and JSON, and how to build mashups. The course explores issues of security, scalability, and cross-browser support and also discusses enterprise-level deployments of websites, including third-party hosting, virtualization, colocation in data centers, firewalling, and load-balancing.")
|
||||
self.assertEqual(len(result['entries']), 10)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
@@ -13,20 +13,21 @@ import xml.etree.ElementTree
|
||||
|
||||
#from youtube_dl.utils import htmlentity_transform
|
||||
from youtube_dl.utils import (
|
||||
timeconvert,
|
||||
sanitize_filename,
|
||||
unescapeHTML,
|
||||
orderedSet,
|
||||
DateRange,
|
||||
unified_strdate,
|
||||
encodeFilename,
|
||||
find_xpath_attr,
|
||||
get_meta_content,
|
||||
xpath_with_ns,
|
||||
smuggle_url,
|
||||
unsmuggle_url,
|
||||
orderedSet,
|
||||
sanitize_filename,
|
||||
shell_quote,
|
||||
encodeFilename,
|
||||
smuggle_url,
|
||||
str_to_int,
|
||||
timeconvert,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
unsmuggle_url,
|
||||
url_basename,
|
||||
xpath_with_ns,
|
||||
)
|
||||
|
||||
if sys.version_info < (3, 0):
|
||||
@@ -181,6 +182,12 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(str_to_int('123,456'), 123456)
|
||||
self.assertEqual(str_to_int('123.456'), 123456)
|
||||
|
||||
def test_url_basename(self):
|
||||
self.assertEqual(url_basename(u'http://foo.de/'), u'')
|
||||
self.assertEqual(url_basename(u'http://foo.de/bar/baz'), u'baz')
|
||||
self.assertEqual(url_basename(u'http://foo.de/bar/baz?x=y'), u'baz')
|
||||
self.assertEqual(url_basename(u'http://foo.de/bar/baz#x=y'), u'baz')
|
||||
self.assertEqual(url_basename(u'http://foo.de/bar/baz/'), u'baz')
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
@@ -47,6 +47,7 @@ from .utils import (
|
||||
subtitles_filename,
|
||||
takewhile_inclusive,
|
||||
UnavailableVideoError,
|
||||
url_basename,
|
||||
write_json_file,
|
||||
write_string,
|
||||
YoutubeDLHandler,
|
||||
@@ -484,6 +485,7 @@ class YoutubeDL(object):
|
||||
{
|
||||
'extractor': ie.IE_NAME,
|
||||
'webpage_url': url,
|
||||
'webpage_url_basename': url_basename(url),
|
||||
'extractor_key': ie.ie_key(),
|
||||
})
|
||||
if process:
|
||||
@@ -576,6 +578,7 @@ class YoutubeDL(object):
|
||||
'playlist_index': i + playliststart,
|
||||
'extractor': ie_result['extractor'],
|
||||
'webpage_url': ie_result['webpage_url'],
|
||||
'webpage_url_basename': url_basename(ie_result['webpage_url']),
|
||||
'extractor_key': ie_result['extractor_key'],
|
||||
}
|
||||
|
||||
@@ -596,6 +599,7 @@ class YoutubeDL(object):
|
||||
{
|
||||
'extractor': ie_result['extractor'],
|
||||
'webpage_url': ie_result['webpage_url'],
|
||||
'webpage_url_basename': url_basename(ie_result['webpage_url']),
|
||||
'extractor_key': ie_result['extractor_key'],
|
||||
})
|
||||
return r
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
from .appletrailers import AppleTrailersIE
|
||||
from .academicearth import AcademicEarthCourseIE
|
||||
from .addanime import AddAnimeIE
|
||||
from .anitube import AnitubeIE
|
||||
from .appletrailers import AppleTrailersIE
|
||||
from .archiveorg import ArchiveOrgIE
|
||||
from .ard import ARDIE
|
||||
from .arte import (
|
||||
|
||||
36
youtube_dl/extractor/academicearth.py
Normal file
36
youtube_dl/extractor/academicearth.py
Normal file
@@ -0,0 +1,36 @@
|
||||
import datetime
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
remove_start,
|
||||
)
|
||||
|
||||
|
||||
class AcademicEarthCourseIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:www\.)?academicearth\.org/(?:courses|playlists)/(?P<id>[^?#/]+)'
|
||||
IE_NAME = u'AcademicEarth:Course'
|
||||
|
||||
def _real_extract(self, url):
|
||||
m = re.match(self._VALID_URL, url)
|
||||
playlist_id = m.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
title = self._html_search_regex(
|
||||
r'<h1 class="playlist-name">(.*?)</h1>', webpage, u'title')
|
||||
description = self._html_search_regex(
|
||||
r'<p class="excerpt">(.*?)</p>',
|
||||
webpage, u'description', fatal=False)
|
||||
urls = re.findall(
|
||||
r'<h3 class="lecture-title"><a target="_blank" href="([^"]+)">',
|
||||
webpage)
|
||||
entries = [self.url_result(u) for u in urls]
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': playlist_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'entries': entries,
|
||||
}
|
||||
@@ -266,20 +266,6 @@ class ArteTVDDCIE(ArteTVPlus7IE):
|
||||
IE_NAME = u'arte.tv:ddc'
|
||||
_VALID_URL = r'http?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>.+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://ddc.arte.tv/folge/neues-aus-mauretanien',
|
||||
u'file': u'049881-009_PLUS7-D.flv',
|
||||
u'info_dict': {
|
||||
u'title': u'Mit offenen Karten',
|
||||
u'description': u'md5:57929b0eaeddeb8a0c983f58e9ebd3b6',
|
||||
u'upload_date': u'20131207',
|
||||
},
|
||||
u'params': {
|
||||
# rtmp download
|
||||
u'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, lang = self._extract_url_info(url)
|
||||
if lang == 'folge':
|
||||
|
||||
@@ -18,6 +18,7 @@ from ..utils import (
|
||||
sanitize_filename,
|
||||
unescapeHTML,
|
||||
)
|
||||
_NO_DEFAULT = object()
|
||||
|
||||
|
||||
class InfoExtractor(object):
|
||||
@@ -281,7 +282,7 @@ class InfoExtractor(object):
|
||||
video_info['title'] = playlist_title
|
||||
return video_info
|
||||
|
||||
def _search_regex(self, pattern, string, name, default=None, fatal=True, flags=0):
|
||||
def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0):
|
||||
"""
|
||||
Perform a regex search on the given string, using a single or a list of
|
||||
patterns returning the first matching group.
|
||||
@@ -295,7 +296,7 @@ class InfoExtractor(object):
|
||||
mobj = re.search(p, string, flags)
|
||||
if mobj: break
|
||||
|
||||
if sys.stderr.isatty() and os.name != 'nt':
|
||||
if os.name != 'nt' and sys.stderr.isatty():
|
||||
_name = u'\033[0;34m%s\033[0m' % name
|
||||
else:
|
||||
_name = name
|
||||
@@ -303,7 +304,7 @@ class InfoExtractor(object):
|
||||
if mobj:
|
||||
# return the first matching group
|
||||
return next(g for g in mobj.groups() if g is not None)
|
||||
elif default is not None:
|
||||
elif default is not _NO_DEFAULT:
|
||||
return default
|
||||
elif fatal:
|
||||
raise RegexNotFoundError(u'Unable to extract %s' % _name)
|
||||
@@ -312,7 +313,7 @@ class InfoExtractor(object):
|
||||
u'please report this issue on http://yt-dl.org/bug' % _name)
|
||||
return None
|
||||
|
||||
def _html_search_regex(self, pattern, string, name, default=None, fatal=True, flags=0):
|
||||
def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0):
|
||||
"""
|
||||
Like _search_regex, but strips HTML tags and unescapes entities.
|
||||
"""
|
||||
|
||||
@@ -241,6 +241,12 @@ class GenericIE(InfoExtractor):
|
||||
# Don't set the extractor because it can be a track url or an album
|
||||
return self.url_result(burl)
|
||||
|
||||
# Look for embedded Vevo player
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'))
|
||||
|
||||
# Start with something easy: JW Player in SWFObject
|
||||
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
|
||||
if mobj is None:
|
||||
|
||||
@@ -93,7 +93,9 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
|
||||
|
||||
class MTVIE(MTVServicesInfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$'
|
||||
_VALID_URL = r'''(?x)^https?://
|
||||
(?:(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$|
|
||||
m\.mtv\.com/videos/video\.rbml\?.*?id=(?P<mgid>[^&]+))'''
|
||||
|
||||
_FEED_URL = 'http://www.mtv.com/player/embed/AS3/rss/'
|
||||
|
||||
@@ -127,16 +129,17 @@ class MTVIE(MTVServicesInfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('videoid')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
# Some videos come from Vevo.com
|
||||
m_vevo = re.search(r'isVevoVideo = true;.*?vevoVideoId = "(.*?)";',
|
||||
webpage, re.DOTALL)
|
||||
if m_vevo:
|
||||
vevo_id = m_vevo.group(1);
|
||||
self.to_screen(u'Vevo video detected: %s' % vevo_id)
|
||||
return self.url_result('vevo:%s' % vevo_id, ie='Vevo')
|
||||
|
||||
uri = self._html_search_regex(r'/uri/(.*?)\?', webpage, u'uri')
|
||||
uri = mobj.group('mgid')
|
||||
if uri is None:
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
# Some videos come from Vevo.com
|
||||
m_vevo = re.search(r'isVevoVideo = true;.*?vevoVideoId = "(.*?)";',
|
||||
webpage, re.DOTALL)
|
||||
if m_vevo:
|
||||
vevo_id = m_vevo.group(1);
|
||||
self.to_screen(u'Vevo video detected: %s' % vevo_id)
|
||||
return self.url_result('vevo:%s' % vevo_id, ie='Vevo')
|
||||
|
||||
uri = self._html_search_regex(r'/uri/(.*?)\?', webpage, u'uri')
|
||||
return self._get_videos_info(uri)
|
||||
|
||||
@@ -15,12 +15,12 @@ class RadioFranceIE(InfoExtractor):
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://maison.radiofrance.fr/radiovisions/one-one',
|
||||
u'file': u'one-one.mp4',
|
||||
u'md5': u'todo',
|
||||
u'file': u'one-one.ogg',
|
||||
u'md5': u'bdbb28ace95ed0e04faab32ba3160daf',
|
||||
u'info_dict': {
|
||||
u"title": u"One to one",
|
||||
u"description": u"Plutôt que d'imaginer la radio de demain comme technologie ou comme création de contenu, je veux montrer que quelles que soient ses évolutions, j'ai l'intime conviction que la radio continuera d'être un grand média de proximité pour les auditeurs.",
|
||||
u"uploader": u"ferdi",
|
||||
u"uploader": u"Thomas Hercouët",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@ from ..utils import (
|
||||
|
||||
class RTLnowIE(InfoExtractor):
|
||||
"""Information Extractor for RTL NOW, RTL2 NOW, RTL NITRO, SUPER RTL NOW, VOX NOW and n-tv NOW"""
|
||||
_VALID_URL = r'(?:http://)?(?P<url>(?P<base_url>rtl-now\.rtl\.de|rtl2now\.rtl2\.de|(?:www\.)?voxnow\.de|(?:www\.)?rtlnitronow\.de|(?:www\.)?superrtlnow\.de|(?:www\.)?n-tvnow\.de)/+[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
|
||||
_VALID_URL = r'(?:http://)?(?P<url>(?P<domain>rtl-now\.rtl\.de|rtl2now\.rtl2\.de|(?:www\.)?voxnow\.de|(?:www\.)?rtlnitronow\.de|(?:www\.)?superrtlnow\.de|(?:www\.)?n-tvnow\.de)/+[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
|
||||
_TESTS = [{
|
||||
u'url': u'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1',
|
||||
u'file': u'90419.flv',
|
||||
@@ -82,7 +82,7 @@ class RTLnowIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
webpage_url = u'http://' + mobj.group('url')
|
||||
video_page_url = u'http://' + mobj.group('base_url')
|
||||
video_page_url = u'http://' + mobj.group('domain') + u'/'
|
||||
video_id = mobj.group(u'video_id')
|
||||
|
||||
webpage = self._download_webpage(webpage_url, video_id)
|
||||
|
||||
@@ -15,7 +15,12 @@ class VevoIE(InfoExtractor):
|
||||
Accepts urls from vevo.com or in the format 'vevo:{id}'
|
||||
(currently used by MTVIE)
|
||||
"""
|
||||
_VALID_URL = r'((http://www\.vevo\.com/watch/(?:[^/]+/[^/]+/)?)|(vevo:))(?P<id>.*?)(\?|$)'
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:https?://www\.vevo\.com/watch/(?:[^/]+/[^/]+/)?|
|
||||
https?://cache\.vevo\.com/m/html/embed\.html\?video=|
|
||||
https?://videoplayer\.vevo\.com/embed/embedded\?videoId=|
|
||||
vevo:)
|
||||
(?P<id>[^&?#]+)'''
|
||||
_TESTS = [{
|
||||
u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
|
||||
u'file': u'GB1101300280.mp4',
|
||||
|
||||
@@ -32,7 +32,7 @@ class XTubeIE(InfoExtractor):
|
||||
|
||||
video_title = self._html_search_regex(r'<div class="p_5px[^>]*>([^<]+)', webpage, u'title')
|
||||
video_uploader = self._html_search_regex(r'so_s\.addVariable\("owner_u", "([^"]+)', webpage, u'uploader', fatal=False)
|
||||
video_description = self._html_search_regex(r'<p class="video_description">([^<]+)', webpage, u'description', default=None)
|
||||
video_description = self._html_search_regex(r'<p class="video_description">([^<]+)', webpage, u'description', fatal=False)
|
||||
video_url= self._html_search_regex(r'var videoMp4 = "([^"]+)', webpage, u'video_url').replace('\\/', '/')
|
||||
path = compat_urllib_parse_urlparse(video_url).path
|
||||
extension = os.path.splitext(path)[1][1:]
|
||||
|
||||
@@ -1361,7 +1361,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
video_description = u''
|
||||
|
||||
def _extract_count(klass):
|
||||
count = self._search_regex(r'class="%s">([\d,]+)</span>' % re.escape(klass), video_webpage, klass, fatal=False)
|
||||
count = self._search_regex(
|
||||
r'class="%s">([\d,]+)</span>' % re.escape(klass),
|
||||
video_webpage, klass, default=None)
|
||||
if count is not None:
|
||||
return int(count.replace(',', ''))
|
||||
return None
|
||||
|
||||
@@ -1084,3 +1084,10 @@ def remove_start(s, start):
|
||||
if s.startswith(start):
|
||||
return s[len(start):]
|
||||
return s
|
||||
|
||||
|
||||
def url_basename(url):
|
||||
m = re.match(r'(?:https?:|)//[^/]+/(?:[^/?#]+/)?([^/?#]+)/?(?:[?#]|$)', url)
|
||||
if not m:
|
||||
return u''
|
||||
return m.group(1)
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
|
||||
__version__ = '2013.12.16.5'
|
||||
__version__ = '2013.12.17.1'
|
||||
|
||||
Reference in New Issue
Block a user