mirror of
https://source.netsyms.com/Mirrors/youtube-dl
synced 2026-05-07 05:48:16 +00:00
Compare commits
53 Commits
2013.12.11
...
2013.12.16
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
83c632dc43 | ||
|
|
ff07a05575 | ||
|
|
f25571ffbf | ||
|
|
f7a6892572 | ||
|
|
8fe56478f8 | ||
|
|
0e2a436dce | ||
|
|
24050dd11c | ||
|
|
8c8e3eec79 | ||
|
|
7ebc9dee69 | ||
|
|
ee3e63e477 | ||
|
|
e9c424c144 | ||
|
|
0a9ce268ba | ||
|
|
4b2da48ea7 | ||
|
|
e64eaaa97d | ||
|
|
780603027f | ||
|
|
00902cd601 | ||
|
|
d67b0b1596 | ||
|
|
d7dda16888 | ||
|
|
a19fd00cc4 | ||
|
|
d66152a898 | ||
|
|
8c5f0c9fbc | ||
|
|
6888a874a1 | ||
|
|
09dacfa57f | ||
|
|
b2ae513586 | ||
|
|
e4a0489f6e | ||
|
|
b83be81d27 | ||
|
|
6f5dcd4eee | ||
|
|
1bb2fc98e0 | ||
|
|
e3946f989e | ||
|
|
8863d0de91 | ||
|
|
7b6fefc9d4 | ||
|
|
525ef9227f | ||
|
|
c0ba0f4859 | ||
|
|
b466b7029d | ||
|
|
fa3ae234e0 | ||
|
|
48462108f3 | ||
|
|
f8b56e95b8 | ||
|
|
5fe18bdbde | ||
|
|
dca02c80bc | ||
|
|
9ee859b683 | ||
|
|
8e05c870b4 | ||
|
|
5d574e143f | ||
|
|
2a203a6cda | ||
|
|
dadb8184e4 | ||
|
|
7a563df90a | ||
|
|
24b173fa5c | ||
|
|
9b17ba0fa5 | ||
|
|
211f555d4c | ||
|
|
4d2ebb6bd7 | ||
|
|
df53747436 | ||
|
|
f2c36ee43e | ||
|
|
00381b4ccb | ||
|
|
df1d7da2af |
@@ -56,6 +56,10 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
--date DATE download only videos uploaded in this date
|
--date DATE download only videos uploaded in this date
|
||||||
--datebefore DATE download only videos uploaded before this date
|
--datebefore DATE download only videos uploaded before this date
|
||||||
--dateafter DATE download only videos uploaded after this date
|
--dateafter DATE download only videos uploaded after this date
|
||||||
|
--min-views COUNT Do not download any videos with less than COUNT
|
||||||
|
views
|
||||||
|
--max-views COUNT Do not download any videos with more than COUNT
|
||||||
|
views
|
||||||
--no-playlist download only the currently playing video
|
--no-playlist download only the currently playing video
|
||||||
--age-limit YEARS download only videos suitable for the given age
|
--age-limit YEARS download only videos suitable for the given age
|
||||||
--download-archive FILE Download only videos not listed in the archive
|
--download-archive FILE Download only videos not listed in the archive
|
||||||
@@ -127,6 +131,7 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
--get-id simulate, quiet but print id
|
--get-id simulate, quiet but print id
|
||||||
--get-thumbnail simulate, quiet but print thumbnail URL
|
--get-thumbnail simulate, quiet but print thumbnail URL
|
||||||
--get-description simulate, quiet but print video description
|
--get-description simulate, quiet but print video description
|
||||||
|
--get-duration simulate, quiet but print video length
|
||||||
--get-filename simulate, quiet but print output filename
|
--get-filename simulate, quiet but print output filename
|
||||||
--get-format simulate, quiet but print output format
|
--get-format simulate, quiet but print output format
|
||||||
-j, --dump-json simulate, quiet but print JSON information
|
-j, --dump-json simulate, quiet but print JSON information
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|||||||
from test.helper import get_testcases
|
from test.helper import get_testcases
|
||||||
|
|
||||||
from youtube_dl.extractor import (
|
from youtube_dl.extractor import (
|
||||||
|
FacebookIE,
|
||||||
gen_extractors,
|
gen_extractors,
|
||||||
JustinTVIE,
|
JustinTVIE,
|
||||||
YoutubeIE,
|
YoutubeIE,
|
||||||
@@ -87,12 +88,15 @@ class TestAllURLsMatching(unittest.TestCase):
|
|||||||
assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc')
|
assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc')
|
||||||
assertExtractId('BaW_jenozKc', 'BaW_jenozKc')
|
assertExtractId('BaW_jenozKc', 'BaW_jenozKc')
|
||||||
|
|
||||||
|
def test_facebook_matching(self):
|
||||||
|
self.assertTrue(FacebookIE.suitable(u'https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
|
||||||
|
|
||||||
def test_no_duplicates(self):
|
def test_no_duplicates(self):
|
||||||
ies = gen_extractors()
|
ies = gen_extractors()
|
||||||
for tc in get_testcases():
|
for tc in get_testcases():
|
||||||
url = tc['url']
|
url = tc['url']
|
||||||
for ie in ies:
|
for ie in ies:
|
||||||
if type(ie).__name__ in ['GenericIE', tc['name'] + 'IE']:
|
if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'):
|
||||||
self.assertTrue(ie.suitable(url), '%s should match URL %r' % (type(ie).__name__, url))
|
self.assertTrue(ie.suitable(url), '%s should match URL %r' % (type(ie).__name__, url))
|
||||||
else:
|
else:
|
||||||
self.assertFalse(ie.suitable(url), '%s should not match URL %r' % (type(ie).__name__, url))
|
self.assertFalse(ie.suitable(url), '%s should not match URL %r' % (type(ie).__name__, url))
|
||||||
|
|||||||
@@ -34,6 +34,7 @@ from .utils import (
|
|||||||
encodeFilename,
|
encodeFilename,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
format_bytes,
|
format_bytes,
|
||||||
|
formatSeconds,
|
||||||
get_term_width,
|
get_term_width,
|
||||||
locked_file,
|
locked_file,
|
||||||
make_HTTPS_handler,
|
make_HTTPS_handler,
|
||||||
@@ -94,6 +95,7 @@ class YoutubeDL(object):
|
|||||||
forcethumbnail: Force printing thumbnail URL.
|
forcethumbnail: Force printing thumbnail URL.
|
||||||
forcedescription: Force printing description.
|
forcedescription: Force printing description.
|
||||||
forcefilename: Force printing final filename.
|
forcefilename: Force printing final filename.
|
||||||
|
forceduration: Force printing duration.
|
||||||
forcejson: Force printing info_dict as JSON.
|
forcejson: Force printing info_dict as JSON.
|
||||||
simulate: Do not download the video files.
|
simulate: Do not download the video files.
|
||||||
format: Video format code.
|
format: Video format code.
|
||||||
@@ -127,7 +129,16 @@ class YoutubeDL(object):
|
|||||||
noplaylist: Download single video instead of a playlist if in doubt.
|
noplaylist: Download single video instead of a playlist if in doubt.
|
||||||
age_limit: An integer representing the user's age in years.
|
age_limit: An integer representing the user's age in years.
|
||||||
Unsuitable videos for the given age are skipped.
|
Unsuitable videos for the given age are skipped.
|
||||||
download_archive: File name of a file where all downloads are recorded.
|
min_views: An integer representing the minimum view count the video
|
||||||
|
must have in order to not be skipped.
|
||||||
|
Videos without view count information are always
|
||||||
|
downloaded. None for no limit.
|
||||||
|
max_views: An integer representing the maximum view count.
|
||||||
|
Videos that are more popular than that are not
|
||||||
|
downloaded.
|
||||||
|
Videos without view count information are always
|
||||||
|
downloaded. None for no limit.
|
||||||
|
download_archive: File name of a file where all downloads are recorded.
|
||||||
Videos already present in the file are not downloaded
|
Videos already present in the file are not downloaded
|
||||||
again.
|
again.
|
||||||
cookiefile: File name where cookies should be read from and dumped to.
|
cookiefile: File name where cookies should be read from and dumped to.
|
||||||
@@ -355,22 +366,6 @@ class YoutubeDL(object):
|
|||||||
error_message = u'%s %s' % (_msg_header, message)
|
error_message = u'%s %s' % (_msg_header, message)
|
||||||
self.trouble(error_message, tb)
|
self.trouble(error_message, tb)
|
||||||
|
|
||||||
def report_writedescription(self, descfn):
|
|
||||||
""" Report that the description file is being written """
|
|
||||||
self.to_screen(u'[info] Writing video description to: ' + descfn)
|
|
||||||
|
|
||||||
def report_writesubtitles(self, sub_filename):
|
|
||||||
""" Report that the subtitles file is being written """
|
|
||||||
self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
|
|
||||||
|
|
||||||
def report_writeinfojson(self, infofn):
|
|
||||||
""" Report that the metadata file has been written """
|
|
||||||
self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
|
|
||||||
|
|
||||||
def report_writeannotations(self, annofn):
|
|
||||||
""" Report that the annotations file has been written. """
|
|
||||||
self.to_screen(u'[info] Writing video annotations to: ' + annofn)
|
|
||||||
|
|
||||||
def report_file_already_downloaded(self, file_name):
|
def report_file_already_downloaded(self, file_name):
|
||||||
"""Report file has already been fully downloaded."""
|
"""Report file has already been fully downloaded."""
|
||||||
try:
|
try:
|
||||||
@@ -415,13 +410,14 @@ class YoutubeDL(object):
|
|||||||
def _match_entry(self, info_dict):
|
def _match_entry(self, info_dict):
|
||||||
""" Returns None iff the file should be downloaded """
|
""" Returns None iff the file should be downloaded """
|
||||||
|
|
||||||
|
video_title = info_dict.get('title', info_dict.get('id', u'video'))
|
||||||
if 'title' in info_dict:
|
if 'title' in info_dict:
|
||||||
# This can happen when we're just evaluating the playlist
|
# This can happen when we're just evaluating the playlist
|
||||||
title = info_dict['title']
|
title = info_dict['title']
|
||||||
matchtitle = self.params.get('matchtitle', False)
|
matchtitle = self.params.get('matchtitle', False)
|
||||||
if matchtitle:
|
if matchtitle:
|
||||||
if not re.search(matchtitle, title, re.IGNORECASE):
|
if not re.search(matchtitle, title, re.IGNORECASE):
|
||||||
return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
|
return u'"' + title + '" title did not match pattern "' + matchtitle + '"'
|
||||||
rejecttitle = self.params.get('rejecttitle', False)
|
rejecttitle = self.params.get('rejecttitle', False)
|
||||||
if rejecttitle:
|
if rejecttitle:
|
||||||
if re.search(rejecttitle, title, re.IGNORECASE):
|
if re.search(rejecttitle, title, re.IGNORECASE):
|
||||||
@@ -430,14 +426,21 @@ class YoutubeDL(object):
|
|||||||
if date is not None:
|
if date is not None:
|
||||||
dateRange = self.params.get('daterange', DateRange())
|
dateRange = self.params.get('daterange', DateRange())
|
||||||
if date not in dateRange:
|
if date not in dateRange:
|
||||||
return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
|
return u'%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
|
||||||
|
view_count = info_dict.get('view_count', None)
|
||||||
|
if view_count is not None:
|
||||||
|
min_views = self.params.get('min_views')
|
||||||
|
if min_views is not None and view_count < min_views:
|
||||||
|
return u'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
|
||||||
|
max_views = self.params.get('max_views')
|
||||||
|
if max_views is not None and view_count > max_views:
|
||||||
|
return u'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
|
||||||
age_limit = self.params.get('age_limit')
|
age_limit = self.params.get('age_limit')
|
||||||
if age_limit is not None:
|
if age_limit is not None:
|
||||||
if age_limit < info_dict.get('age_limit', 0):
|
if age_limit < info_dict.get('age_limit', 0):
|
||||||
return u'Skipping "' + title + '" because it is age restricted'
|
return u'Skipping "' + title + '" because it is age restricted'
|
||||||
if self.in_download_archive(info_dict):
|
if self.in_download_archive(info_dict):
|
||||||
return (u'%s has already been recorded in archive'
|
return u'%s has already been recorded in archive' % video_title
|
||||||
% info_dict.get('title', info_dict.get('id', u'video')))
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@@ -554,16 +557,16 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
n_all_entries = len(ie_result['entries'])
|
n_all_entries = len(ie_result['entries'])
|
||||||
playliststart = self.params.get('playliststart', 1) - 1
|
playliststart = self.params.get('playliststart', 1) - 1
|
||||||
playlistend = self.params.get('playlistend', -1)
|
playlistend = self.params.get('playlistend', None)
|
||||||
|
# For backwards compatibility, interpret -1 as whole list
|
||||||
if playlistend == -1:
|
if playlistend == -1:
|
||||||
entries = ie_result['entries'][playliststart:]
|
playlistend = None
|
||||||
else:
|
|
||||||
entries = ie_result['entries'][playliststart:playlistend]
|
|
||||||
|
|
||||||
|
entries = ie_result['entries'][playliststart:playlistend]
|
||||||
n_entries = len(entries)
|
n_entries = len(entries)
|
||||||
|
|
||||||
self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
|
self.to_screen(
|
||||||
|
u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
|
||||||
(ie_result['extractor'], playlist, n_all_entries, n_entries))
|
(ie_result['extractor'], playlist, n_all_entries, n_entries))
|
||||||
|
|
||||||
for i, entry in enumerate(entries, 1):
|
for i, entry in enumerate(entries, 1):
|
||||||
@@ -748,6 +751,8 @@ class YoutubeDL(object):
|
|||||||
self.to_stdout(info_dict['description'])
|
self.to_stdout(info_dict['description'])
|
||||||
if self.params.get('forcefilename', False) and filename is not None:
|
if self.params.get('forcefilename', False) and filename is not None:
|
||||||
self.to_stdout(filename)
|
self.to_stdout(filename)
|
||||||
|
if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
|
||||||
|
self.to_stdout(formatSeconds(info_dict['duration']))
|
||||||
if self.params.get('forceformat', False):
|
if self.params.get('forceformat', False):
|
||||||
self.to_stdout(info_dict['format'])
|
self.to_stdout(info_dict['format'])
|
||||||
if self.params.get('forcejson', False):
|
if self.params.get('forcejson', False):
|
||||||
@@ -770,28 +775,34 @@ class YoutubeDL(object):
|
|||||||
return
|
return
|
||||||
|
|
||||||
if self.params.get('writedescription', False):
|
if self.params.get('writedescription', False):
|
||||||
try:
|
descfn = filename + u'.description'
|
||||||
descfn = filename + u'.description'
|
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
|
||||||
self.report_writedescription(descfn)
|
self.to_screen(u'[info] Video description is already present')
|
||||||
with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
|
else:
|
||||||
descfile.write(info_dict['description'])
|
try:
|
||||||
except (KeyError, TypeError):
|
self.to_screen(u'[info] Writing video description to: ' + descfn)
|
||||||
self.report_warning(u'There\'s no description to write.')
|
with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
|
||||||
except (OSError, IOError):
|
descfile.write(info_dict['description'])
|
||||||
self.report_error(u'Cannot write description file ' + descfn)
|
except (KeyError, TypeError):
|
||||||
return
|
self.report_warning(u'There\'s no description to write.')
|
||||||
|
except (OSError, IOError):
|
||||||
|
self.report_error(u'Cannot write description file ' + descfn)
|
||||||
|
return
|
||||||
|
|
||||||
if self.params.get('writeannotations', False):
|
if self.params.get('writeannotations', False):
|
||||||
try:
|
annofn = filename + u'.annotations.xml'
|
||||||
annofn = filename + u'.annotations.xml'
|
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
|
||||||
self.report_writeannotations(annofn)
|
self.to_screen(u'[info] Video annotations are already present')
|
||||||
with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
|
else:
|
||||||
annofile.write(info_dict['annotations'])
|
try:
|
||||||
except (KeyError, TypeError):
|
self.to_screen(u'[info] Writing video annotations to: ' + annofn)
|
||||||
self.report_warning(u'There are no annotations to write.')
|
with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
|
||||||
except (OSError, IOError):
|
annofile.write(info_dict['annotations'])
|
||||||
self.report_error(u'Cannot write annotations file: ' + annofn)
|
except (KeyError, TypeError):
|
||||||
return
|
self.report_warning(u'There are no annotations to write.')
|
||||||
|
except (OSError, IOError):
|
||||||
|
self.report_error(u'Cannot write annotations file: ' + annofn)
|
||||||
|
return
|
||||||
|
|
||||||
subtitles_are_requested = any([self.params.get('writesubtitles', False),
|
subtitles_are_requested = any([self.params.get('writesubtitles', False),
|
||||||
self.params.get('writeautomaticsub')])
|
self.params.get('writeautomaticsub')])
|
||||||
@@ -807,38 +818,48 @@ class YoutubeDL(object):
|
|||||||
continue
|
continue
|
||||||
try:
|
try:
|
||||||
sub_filename = subtitles_filename(filename, sub_lang, sub_format)
|
sub_filename = subtitles_filename(filename, sub_lang, sub_format)
|
||||||
self.report_writesubtitles(sub_filename)
|
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
|
||||||
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
|
self.to_screen(u'[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
|
||||||
subfile.write(sub)
|
else:
|
||||||
|
self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
|
||||||
|
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
|
||||||
|
subfile.write(sub)
|
||||||
except (OSError, IOError):
|
except (OSError, IOError):
|
||||||
self.report_error(u'Cannot write subtitles file ' + descfn)
|
self.report_error(u'Cannot write subtitles file ' + descfn)
|
||||||
return
|
return
|
||||||
|
|
||||||
if self.params.get('writeinfojson', False):
|
if self.params.get('writeinfojson', False):
|
||||||
infofn = os.path.splitext(filename)[0] + u'.info.json'
|
infofn = os.path.splitext(filename)[0] + u'.info.json'
|
||||||
self.report_writeinfojson(infofn)
|
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
|
||||||
try:
|
self.to_screen(u'[info] Video description metadata is already present')
|
||||||
json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
|
else:
|
||||||
write_json_file(json_info_dict, encodeFilename(infofn))
|
self.to_screen(u'[info] Writing video description metadata as JSON to: ' + infofn)
|
||||||
except (OSError, IOError):
|
try:
|
||||||
self.report_error(u'Cannot write metadata to JSON file ' + infofn)
|
json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
|
||||||
return
|
write_json_file(json_info_dict, encodeFilename(infofn))
|
||||||
|
except (OSError, IOError):
|
||||||
|
self.report_error(u'Cannot write metadata to JSON file ' + infofn)
|
||||||
|
return
|
||||||
|
|
||||||
if self.params.get('writethumbnail', False):
|
if self.params.get('writethumbnail', False):
|
||||||
if info_dict.get('thumbnail') is not None:
|
if info_dict.get('thumbnail') is not None:
|
||||||
thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
|
thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
|
||||||
thumb_filename = os.path.splitext(filename)[0] + u'.' + thumb_format
|
thumb_filename = os.path.splitext(filename)[0] + u'.' + thumb_format
|
||||||
self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
|
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
|
||||||
(info_dict['extractor'], info_dict['id']))
|
self.to_screen(u'[%s] %s: Thumbnail is already present' %
|
||||||
try:
|
(info_dict['extractor'], info_dict['id']))
|
||||||
uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
|
else:
|
||||||
with open(thumb_filename, 'wb') as thumbf:
|
self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
|
||||||
shutil.copyfileobj(uf, thumbf)
|
(info_dict['extractor'], info_dict['id']))
|
||||||
self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
|
try:
|
||||||
(info_dict['extractor'], info_dict['id'], thumb_filename))
|
uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
|
||||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
with open(thumb_filename, 'wb') as thumbf:
|
||||||
self.report_warning(u'Unable to download thumbnail "%s": %s' %
|
shutil.copyfileobj(uf, thumbf)
|
||||||
(info_dict['thumbnail'], compat_str(err)))
|
self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
|
||||||
|
(info_dict['extractor'], info_dict['id'], thumb_filename))
|
||||||
|
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||||
|
self.report_warning(u'Unable to download thumbnail "%s": %s' %
|
||||||
|
(info_dict['thumbnail'], compat_str(err)))
|
||||||
|
|
||||||
if not self.params.get('skip_download', False):
|
if not self.params.get('skip_download', False):
|
||||||
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
|
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
|
||||||
|
|||||||
@@ -37,6 +37,7 @@ __authors__ = (
|
|||||||
'Anton Larionov',
|
'Anton Larionov',
|
||||||
'Takuya Tsuchida',
|
'Takuya Tsuchida',
|
||||||
'Sergey M.',
|
'Sergey M.',
|
||||||
|
'Michael Orlitzky',
|
||||||
)
|
)
|
||||||
|
|
||||||
__license__ = 'Public Domain'
|
__license__ = 'Public Domain'
|
||||||
@@ -62,6 +63,7 @@ from .utils import (
|
|||||||
MaxDownloadsReached,
|
MaxDownloadsReached,
|
||||||
preferredencoding,
|
preferredencoding,
|
||||||
SameFileError,
|
SameFileError,
|
||||||
|
setproctitle,
|
||||||
std_headers,
|
std_headers,
|
||||||
write_string,
|
write_string,
|
||||||
)
|
)
|
||||||
@@ -196,10 +198,14 @@ def parseOpts(overrideArguments=None):
|
|||||||
help=u'Work around terminals that lack bidirectional text support. Requires fribidi executable in PATH')
|
help=u'Work around terminals that lack bidirectional text support. Requires fribidi executable in PATH')
|
||||||
|
|
||||||
|
|
||||||
selection.add_option('--playlist-start',
|
selection.add_option(
|
||||||
dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is %default)', default=1)
|
'--playlist-start',
|
||||||
selection.add_option('--playlist-end',
|
dest='playliststart', metavar='NUMBER', default=1, type=int,
|
||||||
dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
|
help='playlist video to start at (default is %default)')
|
||||||
|
selection.add_option(
|
||||||
|
'--playlist-end',
|
||||||
|
dest='playlistend', metavar='NUMBER', default=None, type=int,
|
||||||
|
help='playlist video to end at (default is last)')
|
||||||
selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
|
selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
|
||||||
selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
|
selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
|
||||||
selection.add_option('--max-downloads', metavar='NUMBER',
|
selection.add_option('--max-downloads', metavar='NUMBER',
|
||||||
@@ -210,6 +216,14 @@ def parseOpts(overrideArguments=None):
|
|||||||
selection.add_option('--date', metavar='DATE', dest='date', help='download only videos uploaded in this date', default=None)
|
selection.add_option('--date', metavar='DATE', dest='date', help='download only videos uploaded in this date', default=None)
|
||||||
selection.add_option('--datebefore', metavar='DATE', dest='datebefore', help='download only videos uploaded before this date', default=None)
|
selection.add_option('--datebefore', metavar='DATE', dest='datebefore', help='download only videos uploaded before this date', default=None)
|
||||||
selection.add_option('--dateafter', metavar='DATE', dest='dateafter', help='download only videos uploaded after this date', default=None)
|
selection.add_option('--dateafter', metavar='DATE', dest='dateafter', help='download only videos uploaded after this date', default=None)
|
||||||
|
selection.add_option(
|
||||||
|
'--min-views', metavar='COUNT', dest='min_views',
|
||||||
|
default=None, type=int,
|
||||||
|
help="Do not download any videos with less than COUNT views",)
|
||||||
|
selection.add_option(
|
||||||
|
'--max-views', metavar='COUNT', dest='max_views',
|
||||||
|
default=None, type=int,
|
||||||
|
help="Do not download any videos with more than COUNT views",)
|
||||||
selection.add_option('--no-playlist', action='store_true', dest='noplaylist', help='download only the currently playing video', default=False)
|
selection.add_option('--no-playlist', action='store_true', dest='noplaylist', help='download only the currently playing video', default=False)
|
||||||
selection.add_option('--age-limit', metavar='YEARS', dest='age_limit',
|
selection.add_option('--age-limit', metavar='YEARS', dest='age_limit',
|
||||||
help='download only videos suitable for the given age',
|
help='download only videos suitable for the given age',
|
||||||
@@ -290,6 +304,9 @@ def parseOpts(overrideArguments=None):
|
|||||||
verbosity.add_option('--get-description',
|
verbosity.add_option('--get-description',
|
||||||
action='store_true', dest='getdescription',
|
action='store_true', dest='getdescription',
|
||||||
help='simulate, quiet but print video description', default=False)
|
help='simulate, quiet but print video description', default=False)
|
||||||
|
verbosity.add_option('--get-duration',
|
||||||
|
action='store_true', dest='getduration',
|
||||||
|
help='simulate, quiet but print video length', default=False)
|
||||||
verbosity.add_option('--get-filename',
|
verbosity.add_option('--get-filename',
|
||||||
action='store_true', dest='getfilename',
|
action='store_true', dest='getfilename',
|
||||||
help='simulate, quiet but print output filename', default=False)
|
help='simulate, quiet but print output filename', default=False)
|
||||||
@@ -460,12 +477,15 @@ def parseOpts(overrideArguments=None):
|
|||||||
|
|
||||||
return parser, opts, args
|
return parser, opts, args
|
||||||
|
|
||||||
|
|
||||||
def _real_main(argv=None):
|
def _real_main(argv=None):
|
||||||
# Compatibility fixes for Windows
|
# Compatibility fixes for Windows
|
||||||
if sys.platform == 'win32':
|
if sys.platform == 'win32':
|
||||||
# https://github.com/rg3/youtube-dl/issues/820
|
# https://github.com/rg3/youtube-dl/issues/820
|
||||||
codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None)
|
codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None)
|
||||||
|
|
||||||
|
setproctitle(u'youtube-dl')
|
||||||
|
|
||||||
parser, opts, args = parseOpts(argv)
|
parser, opts, args = parseOpts(argv)
|
||||||
|
|
||||||
# Set user agent
|
# Set user agent
|
||||||
@@ -560,18 +580,10 @@ def _real_main(argv=None):
|
|||||||
if numeric_buffersize is None:
|
if numeric_buffersize is None:
|
||||||
parser.error(u'invalid buffer size specified')
|
parser.error(u'invalid buffer size specified')
|
||||||
opts.buffersize = numeric_buffersize
|
opts.buffersize = numeric_buffersize
|
||||||
try:
|
if opts.playliststart <= 0:
|
||||||
opts.playliststart = int(opts.playliststart)
|
raise ValueError(u'Playlist start must be positive')
|
||||||
if opts.playliststart <= 0:
|
if opts.playlistend not in (-1, None) and opts.playlistend < opts.playliststart:
|
||||||
raise ValueError(u'Playlist start must be positive')
|
raise ValueError(u'Playlist end must be greater than playlist start')
|
||||||
except (TypeError, ValueError):
|
|
||||||
parser.error(u'invalid playlist start number specified')
|
|
||||||
try:
|
|
||||||
opts.playlistend = int(opts.playlistend)
|
|
||||||
if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
|
|
||||||
raise ValueError(u'Playlist end must be greater than playlist start')
|
|
||||||
except (TypeError, ValueError):
|
|
||||||
parser.error(u'invalid playlist end number specified')
|
|
||||||
if opts.extractaudio:
|
if opts.extractaudio:
|
||||||
if opts.audioformat not in ['best', 'aac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']:
|
if opts.audioformat not in ['best', 'aac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']:
|
||||||
parser.error(u'invalid audio format specified')
|
parser.error(u'invalid audio format specified')
|
||||||
@@ -604,27 +616,30 @@ def _real_main(argv=None):
|
|||||||
or (opts.useid and u'%(id)s.%(ext)s')
|
or (opts.useid and u'%(id)s.%(ext)s')
|
||||||
or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
|
or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
|
||||||
or u'%(title)s-%(id)s.%(ext)s')
|
or u'%(title)s-%(id)s.%(ext)s')
|
||||||
if '%(ext)s' not in outtmpl and opts.extractaudio:
|
if not os.path.splitext(outtmpl)[1] and opts.extractaudio:
|
||||||
parser.error(u'Cannot download a video and extract audio into the same'
|
parser.error(u'Cannot download a video and extract audio into the same'
|
||||||
u' file! Use "%%(ext)s" instead of %r' %
|
u' file! Use "{0}.%(ext)s" instead of "{0}" as the output'
|
||||||
determine_ext(outtmpl, u''))
|
u' template'.format(outtmpl))
|
||||||
|
|
||||||
|
any_printing = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson
|
||||||
|
|
||||||
ydl_opts = {
|
ydl_opts = {
|
||||||
'usenetrc': opts.usenetrc,
|
'usenetrc': opts.usenetrc,
|
||||||
'username': opts.username,
|
'username': opts.username,
|
||||||
'password': opts.password,
|
'password': opts.password,
|
||||||
'videopassword': opts.videopassword,
|
'videopassword': opts.videopassword,
|
||||||
'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.dumpjson),
|
'quiet': (opts.quiet or any_printing),
|
||||||
'forceurl': opts.geturl,
|
'forceurl': opts.geturl,
|
||||||
'forcetitle': opts.gettitle,
|
'forcetitle': opts.gettitle,
|
||||||
'forceid': opts.getid,
|
'forceid': opts.getid,
|
||||||
'forcethumbnail': opts.getthumbnail,
|
'forcethumbnail': opts.getthumbnail,
|
||||||
'forcedescription': opts.getdescription,
|
'forcedescription': opts.getdescription,
|
||||||
|
'forceduration': opts.getduration,
|
||||||
'forcefilename': opts.getfilename,
|
'forcefilename': opts.getfilename,
|
||||||
'forceformat': opts.getformat,
|
'forceformat': opts.getformat,
|
||||||
'forcejson': opts.dumpjson,
|
'forcejson': opts.dumpjson,
|
||||||
'simulate': opts.simulate,
|
'simulate': opts.simulate,
|
||||||
'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.dumpjson),
|
'skip_download': (opts.skip_download or opts.simulate or any_printing),
|
||||||
'format': opts.format,
|
'format': opts.format,
|
||||||
'format_limit': opts.format_limit,
|
'format_limit': opts.format_limit,
|
||||||
'listformats': opts.listformats,
|
'listformats': opts.listformats,
|
||||||
@@ -668,6 +683,8 @@ def _real_main(argv=None):
|
|||||||
'keepvideo': opts.keepvideo,
|
'keepvideo': opts.keepvideo,
|
||||||
'min_filesize': opts.min_filesize,
|
'min_filesize': opts.min_filesize,
|
||||||
'max_filesize': opts.max_filesize,
|
'max_filesize': opts.max_filesize,
|
||||||
|
'min_views': opts.min_views,
|
||||||
|
'max_views': opts.max_views,
|
||||||
'daterange': date,
|
'daterange': date,
|
||||||
'cachedir': opts.cachedir,
|
'cachedir': opts.cachedir,
|
||||||
'youtube_print_sig_code': opts.youtube_print_sig_code,
|
'youtube_print_sig_code': opts.youtube_print_sig_code,
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ from .arte import (
|
|||||||
from .auengine import AUEngineIE
|
from .auengine import AUEngineIE
|
||||||
from .bambuser import BambuserIE, BambuserChannelIE
|
from .bambuser import BambuserIE, BambuserChannelIE
|
||||||
from .bandcamp import BandcampIE, BandcampAlbumIE
|
from .bandcamp import BandcampIE, BandcampAlbumIE
|
||||||
|
from .blinkx import BlinkxIE
|
||||||
from .bliptv import BlipTVIE, BlipTVUserIE
|
from .bliptv import BlipTVIE, BlipTVUserIE
|
||||||
from .bloomberg import BloombergIE
|
from .bloomberg import BloombergIE
|
||||||
from .breakcom import BreakIE
|
from .breakcom import BreakIE
|
||||||
@@ -20,6 +21,8 @@ from .brightcove import BrightcoveIE
|
|||||||
from .c56 import C56IE
|
from .c56 import C56IE
|
||||||
from .canalplus import CanalplusIE
|
from .canalplus import CanalplusIE
|
||||||
from .canalc2 import Canalc2IE
|
from .canalc2 import Canalc2IE
|
||||||
|
from .cbs import CBSIE
|
||||||
|
from .channel9 import Channel9IE
|
||||||
from .cinemassacre import CinemassacreIE
|
from .cinemassacre import CinemassacreIE
|
||||||
from .clipfish import ClipfishIE
|
from .clipfish import ClipfishIE
|
||||||
from .clipsyndicate import ClipsyndicateIE
|
from .clipsyndicate import ClipsyndicateIE
|
||||||
@@ -87,6 +90,7 @@ from .kickstarter import KickStarterIE
|
|||||||
from .keek import KeekIE
|
from .keek import KeekIE
|
||||||
from .liveleak import LiveLeakIE
|
from .liveleak import LiveLeakIE
|
||||||
from .livestream import LivestreamIE, LivestreamOriginalIE
|
from .livestream import LivestreamIE, LivestreamOriginalIE
|
||||||
|
from .mdr import MDRIE
|
||||||
from .metacafe import MetacafeIE
|
from .metacafe import MetacafeIE
|
||||||
from .metacritic import MetacriticIE
|
from .metacritic import MetacriticIE
|
||||||
from .mit import TechTVMITIE, MITIE
|
from .mit import TechTVMITIE, MITIE
|
||||||
@@ -111,9 +115,11 @@ from .orf import ORFIE
|
|||||||
from .pbs import PBSIE
|
from .pbs import PBSIE
|
||||||
from .photobucket import PhotobucketIE
|
from .photobucket import PhotobucketIE
|
||||||
from .podomatic import PodomaticIE
|
from .podomatic import PodomaticIE
|
||||||
|
from .pornhd import PornHdIE
|
||||||
from .pornhub import PornHubIE
|
from .pornhub import PornHubIE
|
||||||
from .pornotube import PornotubeIE
|
from .pornotube import PornotubeIE
|
||||||
from .pyvideo import PyvideoIE
|
from .pyvideo import PyvideoIE
|
||||||
|
from .radiofrance import RadioFranceIE
|
||||||
from .rbmaradio import RBMARadioIE
|
from .rbmaradio import RBMARadioIE
|
||||||
from .redtube import RedTubeIE
|
from .redtube import RedTubeIE
|
||||||
from .ringtv import RingTVIE
|
from .ringtv import RingTVIE
|
||||||
|
|||||||
@@ -266,20 +266,6 @@ class ArteTVDDCIE(ArteTVPlus7IE):
|
|||||||
IE_NAME = u'arte.tv:ddc'
|
IE_NAME = u'arte.tv:ddc'
|
||||||
_VALID_URL = r'http?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>.+)'
|
_VALID_URL = r'http?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>.+)'
|
||||||
|
|
||||||
_TEST = {
|
|
||||||
u'url': u'http://ddc.arte.tv/folge/neues-aus-mauretanien',
|
|
||||||
u'file': u'049881-009_PLUS7-D.flv',
|
|
||||||
u'info_dict': {
|
|
||||||
u'title': u'Mit offenen Karten',
|
|
||||||
u'description': u'md5:57929b0eaeddeb8a0c983f58e9ebd3b6',
|
|
||||||
u'upload_date': u'20131207',
|
|
||||||
},
|
|
||||||
u'params': {
|
|
||||||
# rtmp download
|
|
||||||
u'skip_download': True,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id, lang = self._extract_url_info(url)
|
video_id, lang = self._extract_url_info(url)
|
||||||
if lang == 'folge':
|
if lang == 'folge':
|
||||||
|
|||||||
86
youtube_dl/extractor/blinkx.py
Normal file
86
youtube_dl/extractor/blinkx.py
Normal file
@@ -0,0 +1,86 @@
|
|||||||
|
import datetime
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
remove_start,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class BlinkxIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'^(?:https?://(?:www\.)blinkx\.com/ce/|blinkx:)(?P<id>[^?]+)'
|
||||||
|
_IE_NAME = u'blinkx'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.blinkx.com/ce/8aQUy7GVFYgFzpKhT0oqsilwOGFRVXk3R1ZGWWdGenBLaFQwb3FzaWx3OGFRVXk3R1ZGWWdGenB',
|
||||||
|
u'file': u'8aQUy7GV.mp4',
|
||||||
|
u'md5': u'2e9a07364af40163a908edbf10bb2492',
|
||||||
|
u'info_dict': {
|
||||||
|
u"title": u"Police Car Rolls Away",
|
||||||
|
u"uploader": u"stupidvideos.com",
|
||||||
|
u"upload_date": u"20131215",
|
||||||
|
u"description": u"A police car gently rolls away from a fight. Maybe it felt weird being around a confrontation and just had to get out of there!",
|
||||||
|
u"duration": 14.886,
|
||||||
|
u"thumbnails": [{
|
||||||
|
"width": 100,
|
||||||
|
"height": 76,
|
||||||
|
"url": "http://cdn.blinkx.com/stream/b/41/StupidVideos/20131215/1873969261/1873969261_tn_0.jpg",
|
||||||
|
}],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
m = re.match(self._VALID_URL, url)
|
||||||
|
video_id = m.group('id')
|
||||||
|
display_id = video_id[:8]
|
||||||
|
|
||||||
|
api_url = (u'https://apib4.blinkx.com/api.php?action=play_video&' +
|
||||||
|
u'video=%s' % video_id)
|
||||||
|
data_json = self._download_webpage(api_url, display_id)
|
||||||
|
data = json.loads(data_json)['api']['results'][0]
|
||||||
|
dt = datetime.datetime.fromtimestamp(data['pubdate_epoch'])
|
||||||
|
upload_date = dt.strftime('%Y%m%d')
|
||||||
|
|
||||||
|
duration = None
|
||||||
|
thumbnails = []
|
||||||
|
formats = []
|
||||||
|
for m in data['media']:
|
||||||
|
if m['type'] == 'jpg':
|
||||||
|
thumbnails.append({
|
||||||
|
'url': m['link'],
|
||||||
|
'width': int(m['w']),
|
||||||
|
'height': int(m['h']),
|
||||||
|
})
|
||||||
|
elif m['type'] == 'original':
|
||||||
|
duration = m['d']
|
||||||
|
elif m['type'] in ('flv', 'mp4'):
|
||||||
|
vcodec = remove_start(m['vcodec'], 'ff')
|
||||||
|
acodec = remove_start(m['acodec'], 'ff')
|
||||||
|
format_id = (u'%s-%sk-%s' %
|
||||||
|
(vcodec,
|
||||||
|
(int(m['vbr']) + int(m['abr'])) // 1000,
|
||||||
|
m['w']))
|
||||||
|
formats.append({
|
||||||
|
'format_id': format_id,
|
||||||
|
'url': m['link'],
|
||||||
|
'vcodec': vcodec,
|
||||||
|
'acodec': acodec,
|
||||||
|
'abr': int(m['abr']) // 1000,
|
||||||
|
'vbr': int(m['vbr']) // 1000,
|
||||||
|
'width': int(m['w']),
|
||||||
|
'height': int(m['h']),
|
||||||
|
})
|
||||||
|
formats.sort(key=lambda f: (f['width'], f['vbr'], f['abr']))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': display_id,
|
||||||
|
'fullid': video_id,
|
||||||
|
'title': data['title'],
|
||||||
|
'formats': formats,
|
||||||
|
'uploader': data['channel_name'],
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'description': data.get('description'),
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'duration': duration,
|
||||||
|
}
|
||||||
30
youtube_dl/extractor/cbs.py
Normal file
30
youtube_dl/extractor/cbs.py
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class CBSIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?cbs\.com/shows/[^/]+/video/(?P<id>[^/]+)/.*'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
|
||||||
|
u'file': u'4JUVEwq3wUT7.flv',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'Connect Chat feat. Garth Brooks',
|
||||||
|
u'description': u'Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS!',
|
||||||
|
u'duration': 1495,
|
||||||
|
},
|
||||||
|
u'params': {
|
||||||
|
# rtmp download
|
||||||
|
u'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
real_id = self._search_regex(
|
||||||
|
r"video\.settings\.pid\s*=\s*'([^']+)';",
|
||||||
|
webpage, u'real video ID')
|
||||||
|
return self.url_result(u'theplatform:%s' % real_id)
|
||||||
267
youtube_dl/extractor/channel9.py
Normal file
267
youtube_dl/extractor/channel9.py
Normal file
@@ -0,0 +1,267 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import ExtractorError
|
||||||
|
|
||||||
|
class Channel9IE(InfoExtractor):
|
||||||
|
'''
|
||||||
|
Common extractor for channel9.msdn.com.
|
||||||
|
|
||||||
|
The type of provided URL (video or playlist) is determined according to
|
||||||
|
meta Search.PageType from web page HTML rather than URL itself, as it is
|
||||||
|
not always possible to do.
|
||||||
|
'''
|
||||||
|
IE_DESC = u'Channel 9'
|
||||||
|
IE_NAME = u'channel9'
|
||||||
|
_VALID_URL = r'^https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+)/?'
|
||||||
|
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
u'url': u'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
|
||||||
|
u'file': u'Events_TechEd_Australia_2013_KOS002.mp4',
|
||||||
|
u'md5': u'bbd75296ba47916b754e73c3a4bbdf10',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'Developer Kick-Off Session: Stuff We Love',
|
||||||
|
u'description': u'md5:c08d72240b7c87fcecafe2692f80e35f',
|
||||||
|
u'duration': 4576,
|
||||||
|
u'thumbnail': u'http://media.ch9.ms/ch9/9d51/03902f2d-fc97-4d3c-b195-0bfe15a19d51/KOS002_220.jpg',
|
||||||
|
u'session_code': u'KOS002',
|
||||||
|
u'session_day': u'Day 1',
|
||||||
|
u'session_room': u'Arena 1A',
|
||||||
|
u'session_speakers': [ u'Ed Blankenship', u'Andrew Coates', u'Brady Gaster', u'Patrick Klug', u'Mads Kristensen' ],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
u'url': u'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
|
||||||
|
u'file': u'posts_Self-service-BI-with-Power-BI-nuclear-testing.mp4',
|
||||||
|
u'md5': u'b43ee4529d111bc37ba7ee4f34813e68',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'Self-service BI with Power BI - nuclear testing',
|
||||||
|
u'description': u'md5:d1e6ecaafa7fb52a2cacdf9599829f5b',
|
||||||
|
u'duration': 1540,
|
||||||
|
u'thumbnail': u'http://media.ch9.ms/ch9/87e1/0300391f-a455-4c72-bec3-4422f19287e1/selfservicenuk_512.jpg',
|
||||||
|
u'authors': [ u'Mike Wilmot' ],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
_RSS_URL = 'http://channel9.msdn.com/%s/RSS'
|
||||||
|
|
||||||
|
# Sorted by quality
|
||||||
|
_known_formats = ['MP3', 'MP4', 'Mid Quality WMV', 'Mid Quality MP4', 'High Quality WMV', 'High Quality MP4']
|
||||||
|
|
||||||
|
def _restore_bytes(self, formatted_size):
|
||||||
|
if not formatted_size:
|
||||||
|
return 0
|
||||||
|
m = re.match(r'^(?P<size>\d+(?:\.\d+)?)\s+(?P<units>[a-zA-Z]+)', formatted_size)
|
||||||
|
if not m:
|
||||||
|
return 0
|
||||||
|
units = m.group('units')
|
||||||
|
try:
|
||||||
|
exponent = [u'B', u'KB', u'MB', u'GB', u'TB', u'PB', u'EB', u'ZB', u'YB'].index(units.upper())
|
||||||
|
except ValueError:
|
||||||
|
return 0
|
||||||
|
size = float(m.group('size'))
|
||||||
|
return int(size * (1024 ** exponent))
|
||||||
|
|
||||||
|
def _formats_from_html(self, html):
|
||||||
|
FORMAT_REGEX = r'''
|
||||||
|
(?x)
|
||||||
|
<a\s+href="(?P<url>[^"]+)">(?P<quality>[^<]+)</a>\s*
|
||||||
|
<span\s+class="usage">\((?P<note>[^\)]+)\)</span>\s*
|
||||||
|
(?:<div\s+class="popup\s+rounded">\s*
|
||||||
|
<h3>File\s+size</h3>\s*(?P<filesize>.*?)\s*
|
||||||
|
</div>)? # File size part may be missing
|
||||||
|
'''
|
||||||
|
# Extract known formats
|
||||||
|
formats = [{'url': x.group('url'),
|
||||||
|
'format_id': x.group('quality'),
|
||||||
|
'format_note': x.group('note'),
|
||||||
|
'format': '%s (%s)' % (x.group('quality'), x.group('note')),
|
||||||
|
'filesize': self._restore_bytes(x.group('filesize')), # File size is approximate
|
||||||
|
} for x in list(re.finditer(FORMAT_REGEX, html)) if x.group('quality') in self._known_formats]
|
||||||
|
# Sort according to known formats list
|
||||||
|
formats.sort(key=lambda fmt: self._known_formats.index(fmt['format_id']))
|
||||||
|
return formats
|
||||||
|
|
||||||
|
def _extract_title(self, html):
|
||||||
|
title = self._html_search_meta(u'title', html, u'title')
|
||||||
|
if title is None:
|
||||||
|
title = self._og_search_title(html)
|
||||||
|
TITLE_SUFFIX = u' (Channel 9)'
|
||||||
|
if title is not None and title.endswith(TITLE_SUFFIX):
|
||||||
|
title = title[:-len(TITLE_SUFFIX)]
|
||||||
|
return title
|
||||||
|
|
||||||
|
def _extract_description(self, html):
|
||||||
|
DESCRIPTION_REGEX = r'''(?sx)
|
||||||
|
<div\s+class="entry-content">\s*
|
||||||
|
<div\s+id="entry-body">\s*
|
||||||
|
(?P<description>.+?)\s*
|
||||||
|
</div>\s*
|
||||||
|
</div>
|
||||||
|
'''
|
||||||
|
m = re.search(DESCRIPTION_REGEX, html)
|
||||||
|
if m is not None:
|
||||||
|
return m.group('description')
|
||||||
|
return self._html_search_meta(u'description', html, u'description')
|
||||||
|
|
||||||
|
def _extract_duration(self, html):
|
||||||
|
m = re.search(r'data-video_duration="(?P<hours>\d{2}):(?P<minutes>\d{2}):(?P<seconds>\d{2})"', html)
|
||||||
|
return ((int(m.group('hours')) * 60 * 60) + (int(m.group('minutes')) * 60) + int(m.group('seconds'))) if m else None
|
||||||
|
|
||||||
|
def _extract_slides(self, html):
|
||||||
|
m = re.search(r'<a href="(?P<slidesurl>[^"]+)" class="slides">Slides</a>', html)
|
||||||
|
return m.group('slidesurl') if m is not None else None
|
||||||
|
|
||||||
|
def _extract_zip(self, html):
|
||||||
|
m = re.search(r'<a href="(?P<zipurl>[^"]+)" class="zip">Zip</a>', html)
|
||||||
|
return m.group('zipurl') if m is not None else None
|
||||||
|
|
||||||
|
def _extract_avg_rating(self, html):
|
||||||
|
m = re.search(r'<p class="avg-rating">Avg Rating: <span>(?P<avgrating>[^<]+)</span></p>', html)
|
||||||
|
return float(m.group('avgrating')) if m is not None else 0
|
||||||
|
|
||||||
|
def _extract_rating_count(self, html):
|
||||||
|
m = re.search(r'<div class="rating-count">\((?P<ratingcount>[^<]+)\)</div>', html)
|
||||||
|
return int(self._fix_count(m.group('ratingcount'))) if m is not None else 0
|
||||||
|
|
||||||
|
def _extract_view_count(self, html):
|
||||||
|
m = re.search(r'<li class="views">\s*<span class="count">(?P<viewcount>[^<]+)</span> Views\s*</li>', html)
|
||||||
|
return int(self._fix_count(m.group('viewcount'))) if m is not None else 0
|
||||||
|
|
||||||
|
def _extract_comment_count(self, html):
|
||||||
|
m = re.search(r'<li class="comments">\s*<a href="#comments">\s*<span class="count">(?P<commentcount>[^<]+)</span> Comments\s*</a>\s*</li>', html)
|
||||||
|
return int(self._fix_count(m.group('commentcount'))) if m is not None else 0
|
||||||
|
|
||||||
|
def _fix_count(self, count):
|
||||||
|
return int(str(count).replace(',', '')) if count is not None else None
|
||||||
|
|
||||||
|
def _extract_authors(self, html):
|
||||||
|
m = re.search(r'(?s)<li class="author">(.*?)</li>', html)
|
||||||
|
if m is None:
|
||||||
|
return None
|
||||||
|
return re.findall(r'<a href="/Niners/[^"]+">([^<]+)</a>', m.group(1))
|
||||||
|
|
||||||
|
def _extract_session_code(self, html):
|
||||||
|
m = re.search(r'<li class="code">\s*(?P<code>.+?)\s*</li>', html)
|
||||||
|
return m.group('code') if m is not None else None
|
||||||
|
|
||||||
|
def _extract_session_day(self, html):
|
||||||
|
m = re.search(r'<li class="day">\s*<a href="/Events/[^"]+">(?P<day>[^<]+)</a>\s*</li>', html)
|
||||||
|
return m.group('day') if m is not None else None
|
||||||
|
|
||||||
|
def _extract_session_room(self, html):
|
||||||
|
m = re.search(r'<li class="room">\s*(?P<room>.+?)\s*</li>', html)
|
||||||
|
return m.group('room') if m is not None else None
|
||||||
|
|
||||||
|
def _extract_session_speakers(self, html):
|
||||||
|
return re.findall(r'<a href="/Events/Speakers/[^"]+">([^<]+)</a>', html)
|
||||||
|
|
||||||
|
def _extract_content(self, html, content_path):
|
||||||
|
# Look for downloadable content
|
||||||
|
formats = self._formats_from_html(html)
|
||||||
|
slides = self._extract_slides(html)
|
||||||
|
zip_ = self._extract_zip(html)
|
||||||
|
|
||||||
|
# Nothing to download
|
||||||
|
if len(formats) == 0 and slides is None and zip_ is None:
|
||||||
|
self._downloader.report_warning(u'None of recording, slides or zip are available for %s' % content_path)
|
||||||
|
return
|
||||||
|
|
||||||
|
# Extract meta
|
||||||
|
title = self._extract_title(html)
|
||||||
|
description = self._extract_description(html)
|
||||||
|
thumbnail = self._og_search_thumbnail(html)
|
||||||
|
duration = self._extract_duration(html)
|
||||||
|
avg_rating = self._extract_avg_rating(html)
|
||||||
|
rating_count = self._extract_rating_count(html)
|
||||||
|
view_count = self._extract_view_count(html)
|
||||||
|
comment_count = self._extract_comment_count(html)
|
||||||
|
|
||||||
|
common = {'_type': 'video',
|
||||||
|
'id': content_path,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
'avg_rating': avg_rating,
|
||||||
|
'rating_count': rating_count,
|
||||||
|
'view_count': view_count,
|
||||||
|
'comment_count': comment_count,
|
||||||
|
}
|
||||||
|
|
||||||
|
result = []
|
||||||
|
|
||||||
|
if slides is not None:
|
||||||
|
d = common.copy()
|
||||||
|
d.update({ 'title': title + '-Slides', 'url': slides })
|
||||||
|
result.append(d)
|
||||||
|
|
||||||
|
if zip_ is not None:
|
||||||
|
d = common.copy()
|
||||||
|
d.update({ 'title': title + '-Zip', 'url': zip_ })
|
||||||
|
result.append(d)
|
||||||
|
|
||||||
|
if len(formats) > 0:
|
||||||
|
d = common.copy()
|
||||||
|
d.update({ 'title': title, 'formats': formats })
|
||||||
|
result.append(d)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
def _extract_entry_item(self, html, content_path):
|
||||||
|
contents = self._extract_content(html, content_path)
|
||||||
|
if contents is None:
|
||||||
|
return contents
|
||||||
|
|
||||||
|
authors = self._extract_authors(html)
|
||||||
|
|
||||||
|
for content in contents:
|
||||||
|
content['authors'] = authors
|
||||||
|
|
||||||
|
return contents
|
||||||
|
|
||||||
|
def _extract_session(self, html, content_path):
|
||||||
|
contents = self._extract_content(html, content_path)
|
||||||
|
if contents is None:
|
||||||
|
return contents
|
||||||
|
|
||||||
|
session_meta = {'session_code': self._extract_session_code(html),
|
||||||
|
'session_day': self._extract_session_day(html),
|
||||||
|
'session_room': self._extract_session_room(html),
|
||||||
|
'session_speakers': self._extract_session_speakers(html),
|
||||||
|
}
|
||||||
|
|
||||||
|
for content in contents:
|
||||||
|
content.update(session_meta)
|
||||||
|
|
||||||
|
return contents
|
||||||
|
|
||||||
|
def _extract_list(self, content_path):
|
||||||
|
rss = self._download_xml(self._RSS_URL % content_path, content_path, u'Downloading RSS')
|
||||||
|
entries = [self.url_result(session_url.text, 'Channel9')
|
||||||
|
for session_url in rss.findall('./channel/item/link')]
|
||||||
|
title_text = rss.find('./channel/title').text
|
||||||
|
return self.playlist_result(entries, content_path, title_text)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
content_path = mobj.group('contentpath')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, content_path, u'Downloading web page')
|
||||||
|
|
||||||
|
page_type_m = re.search(r'<meta name="Search.PageType" content="(?P<pagetype>[^"]+)"/>', webpage)
|
||||||
|
if page_type_m is None:
|
||||||
|
raise ExtractorError(u'Search.PageType not found, don\'t know how to process this page', expected=True)
|
||||||
|
|
||||||
|
page_type = page_type_m.group('pagetype')
|
||||||
|
if page_type == 'List': # List page, may contain list of 'item'-like objects
|
||||||
|
return self._extract_list(content_path)
|
||||||
|
elif page_type == 'Entry.Item': # Any 'item'-like page, may contain downloadable content
|
||||||
|
return self._extract_entry_item(webpage, content_path)
|
||||||
|
elif page_type == 'Session': # Event session page, may contain downloadable content
|
||||||
|
return self._extract_session(webpage, content_path)
|
||||||
|
else:
|
||||||
|
raise ExtractorError(u'Unexpected Search.PageType %s' % page_type, expected=True)
|
||||||
@@ -34,15 +34,39 @@ class InfoExtractor(object):
|
|||||||
The dictionaries must include the following fields:
|
The dictionaries must include the following fields:
|
||||||
|
|
||||||
id: Video identifier.
|
id: Video identifier.
|
||||||
url: Final video URL.
|
|
||||||
title: Video title, unescaped.
|
title: Video title, unescaped.
|
||||||
ext: Video filename extension.
|
|
||||||
|
|
||||||
Instead of url and ext, formats can also specified.
|
Additionally, it must contain either a formats entry or url and ext:
|
||||||
|
|
||||||
|
formats: A list of dictionaries for each format available, it must
|
||||||
|
be ordered from worst to best quality. Potential fields:
|
||||||
|
* url Mandatory. The URL of the video file
|
||||||
|
* ext Will be calculated from url if missing
|
||||||
|
* format A human-readable description of the format
|
||||||
|
("mp4 container with h264/opus").
|
||||||
|
Calculated from the format_id, width, height.
|
||||||
|
and format_note fields if missing.
|
||||||
|
* format_id A short description of the format
|
||||||
|
("mp4_h264_opus" or "19")
|
||||||
|
* format_note Additional info about the format
|
||||||
|
("3D" or "DASH video")
|
||||||
|
* width Width of the video, if known
|
||||||
|
* height Height of the video, if known
|
||||||
|
* abr Average audio bitrate in KBit/s
|
||||||
|
* acodec Name of the audio codec in use
|
||||||
|
* vbr Average video bitrate in KBit/s
|
||||||
|
* vcodec Name of the video codec in use
|
||||||
|
* filesize The number of bytes, if known in advance
|
||||||
|
* player_url SWF Player URL (used for rtmpdump).
|
||||||
|
url: Final video URL.
|
||||||
|
ext: Video filename extension.
|
||||||
|
format: The video format, defaults to ext (used for --get-format)
|
||||||
|
player_url: SWF Player URL (used for rtmpdump).
|
||||||
|
urlhandle: [internal] The urlHandle to be used to download the file,
|
||||||
|
like returned by urllib.request.urlopen
|
||||||
|
|
||||||
The following fields are optional:
|
The following fields are optional:
|
||||||
|
|
||||||
format: The video format, defaults to ext (used for --get-format)
|
|
||||||
thumbnails: A list of dictionaries (with the entries "resolution" and
|
thumbnails: A list of dictionaries (with the entries "resolution" and
|
||||||
"url") for the varying thumbnails
|
"url") for the varying thumbnails
|
||||||
thumbnail: Full URL to a video thumbnail image.
|
thumbnail: Full URL to a video thumbnail image.
|
||||||
@@ -51,35 +75,14 @@ class InfoExtractor(object):
|
|||||||
upload_date: Video upload date (YYYYMMDD).
|
upload_date: Video upload date (YYYYMMDD).
|
||||||
uploader_id: Nickname or id of the video uploader.
|
uploader_id: Nickname or id of the video uploader.
|
||||||
location: Physical location of the video.
|
location: Physical location of the video.
|
||||||
player_url: SWF Player URL (used for rtmpdump).
|
|
||||||
subtitles: The subtitle file contents as a dictionary in the format
|
subtitles: The subtitle file contents as a dictionary in the format
|
||||||
{language: subtitles}.
|
{language: subtitles}.
|
||||||
|
duration: Length of the video in seconds, as an integer.
|
||||||
view_count: How many users have watched the video on the platform.
|
view_count: How many users have watched the video on the platform.
|
||||||
like_count: Number of positive ratings of the video
|
like_count: Number of positive ratings of the video
|
||||||
dislike_count: Number of negative ratings of the video
|
dislike_count: Number of negative ratings of the video
|
||||||
comment_count: Number of comments on the video
|
comment_count: Number of comments on the video
|
||||||
urlhandle: [internal] The urlHandle to be used to download the file,
|
|
||||||
like returned by urllib.request.urlopen
|
|
||||||
age_limit: Age restriction for the video, as an integer (years)
|
age_limit: Age restriction for the video, as an integer (years)
|
||||||
formats: A list of dictionaries for each format available, it must
|
|
||||||
be ordered from worst to best quality. Potential fields:
|
|
||||||
* url Mandatory. The URL of the video file
|
|
||||||
* ext Will be calculated from url if missing
|
|
||||||
* format A human-readable description of the format
|
|
||||||
("mp4 container with h264/opus").
|
|
||||||
Calculated from the format_id, width, height.
|
|
||||||
and format_note fields if missing.
|
|
||||||
* format_id A short description of the format
|
|
||||||
("mp4_h264_opus" or "19")
|
|
||||||
* format_note Additional info about the format
|
|
||||||
("3D" or "DASH video")
|
|
||||||
* width Width of the video, if known
|
|
||||||
* height Height of the video, if known
|
|
||||||
* abr Average audio bitrate in KBit/s
|
|
||||||
* acodec Name of the audio codec in use
|
|
||||||
* vbr Average video bitrate in KBit/s
|
|
||||||
* vcodec Name of the video codec in use
|
|
||||||
* filesize The number of bytes, if known in advance
|
|
||||||
webpage_url: The url to the video webpage, if given to youtube-dl it
|
webpage_url: The url to the video webpage, if given to youtube-dl it
|
||||||
should allow to get the same result again. (It will be set
|
should allow to get the same result again. (It will be set
|
||||||
by YoutubeDL if it's missing)
|
by YoutubeDL if it's missing)
|
||||||
|
|||||||
@@ -28,7 +28,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
|
|||||||
class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||||
"""Information Extractor for Dailymotion"""
|
"""Information Extractor for Dailymotion"""
|
||||||
|
|
||||||
_VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/(?:embed/)?video/([^/]+)'
|
_VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(embed|#)/)?video/(?P<id>[^/?_]+)'
|
||||||
IE_NAME = u'dailymotion'
|
IE_NAME = u'dailymotion'
|
||||||
|
|
||||||
_FORMATS = [
|
_FORMATS = [
|
||||||
@@ -81,7 +81,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
# Extract id and simplified title from URL
|
# Extract id and simplified title from URL
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
|
||||||
video_id = mobj.group(1).split('_')[0].split('?')[0]
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
url = 'http://www.dailymotion.com/video/%s' % video_id
|
url = 'http://www.dailymotion.com/video/%s' % video_id
|
||||||
|
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class DaumIE(InfoExtractor):
|
class DaumIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://tvpot\.daum\.net/.*?clipid=(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/.*?clipid=(?P<id>\d+)'
|
||||||
IE_NAME = u'daum.net'
|
IE_NAME = u'daum.net'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ from ..utils import (
|
|||||||
class FacebookIE(InfoExtractor):
|
class FacebookIE(InfoExtractor):
|
||||||
"""Information Extractor for Facebook"""
|
"""Information Extractor for Facebook"""
|
||||||
|
|
||||||
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
|
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:[^#?]*#!/)?(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
|
||||||
_LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1'
|
_LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1'
|
||||||
_CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1'
|
_CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1'
|
||||||
_NETRC_MACHINE = 'facebook'
|
_NETRC_MACHINE = 'facebook'
|
||||||
@@ -27,7 +27,7 @@ class FacebookIE(InfoExtractor):
|
|||||||
u'file': u'120708114770723.mp4',
|
u'file': u'120708114770723.mp4',
|
||||||
u'md5': u'48975a41ccc4b7a581abd68651c1a5a8',
|
u'md5': u'48975a41ccc4b7a581abd68651c1a5a8',
|
||||||
u'info_dict': {
|
u'info_dict': {
|
||||||
u"duration": 279,
|
u"duration": 279,
|
||||||
u"title": u"PEOPLE ARE AWESOME 2013"
|
u"title": u"PEOPLE ARE AWESOME 2013"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -222,6 +222,18 @@ class GenericIE(InfoExtractor):
|
|||||||
'id': video_id,
|
'id': video_id,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Look for embedded blip.tv player
|
||||||
|
mobj = re.search(r'<meta\s[^>]*https?://api.blip.tv/\w+/redirect/\w+/(\d+)', webpage)
|
||||||
|
if mobj:
|
||||||
|
return self.url_result('http://blip.tv/seo/-'+mobj.group(1), 'BlipTV')
|
||||||
|
mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*https?://(?:\w+\.)?blip.tv/(?:play/|api\.swf#)([a-zA-Z0-9]+)', webpage)
|
||||||
|
if mobj:
|
||||||
|
player_url = 'http://blip.tv/play/%s.x?p=1' % mobj.group(1)
|
||||||
|
player_page = self._download_webpage(player_url, mobj.group(1))
|
||||||
|
blip_video_id = self._search_regex(r'data-episode-id="(\d+)', player_page, u'blip_video_id', fatal=False)
|
||||||
|
if blip_video_id:
|
||||||
|
return self.url_result('http://blip.tv/seo/-'+blip_video_id, 'BlipTV')
|
||||||
|
|
||||||
# Look for Bandcamp pages with custom domain
|
# Look for Bandcamp pages with custom domain
|
||||||
mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
|
mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
@@ -229,6 +241,12 @@ class GenericIE(InfoExtractor):
|
|||||||
# Don't set the extractor because it can be a track url or an album
|
# Don't set the extractor because it can be a track url or an album
|
||||||
return self.url_result(burl)
|
return self.url_result(burl)
|
||||||
|
|
||||||
|
# Look for embedded Vevo player
|
||||||
|
mobj = re.search(
|
||||||
|
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
return self.url_result(mobj.group('url'))
|
||||||
|
|
||||||
# Start with something easy: JW Player in SWFObject
|
# Start with something easy: JW Player in SWFObject
|
||||||
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
|
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
|
||||||
if mobj is None:
|
if mobj is None:
|
||||||
|
|||||||
@@ -44,7 +44,7 @@ class IGNIE(InfoExtractor):
|
|||||||
{
|
{
|
||||||
u'file': u'638672ee848ae4ff108df2a296418ee2.mp4',
|
u'file': u'638672ee848ae4ff108df2a296418ee2.mp4',
|
||||||
u'info_dict': {
|
u'info_dict': {
|
||||||
u'title': u'GTA 5\'s Twisted Beauty in Super Slow Motion',
|
u'title': u'26 Twisted Moments from GTA 5 in Slow Motion',
|
||||||
u'description': u'The twisted beauty of GTA 5 in stunning slow motion.',
|
u'description': u'The twisted beauty of GTA 5 in stunning slow motion.',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|||||||
78
youtube_dl/extractor/mdr.py
Normal file
78
youtube_dl/extractor/mdr.py
Normal file
@@ -0,0 +1,78 @@
|
|||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class MDRIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'^(?P<domain>(?:https?://)?(?:www\.)?mdr\.de)/mediathek/(?:.*)/(?P<type>video|audio)(?P<video_id>[^/_]+)_.*'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
u'url': u'http://www.mdr.de/mediathek/themen/nachrichten/video165624_zc-c5c7de76_zs-3795826d.html',
|
||||||
|
u'file': u'165624.mp4',
|
||||||
|
u'md5': u'ae785f36ecbf2f19b42edf1bc9c85815',
|
||||||
|
u'info_dict': {
|
||||||
|
u"title": u"MDR aktuell Eins30 09.12.2013, 22:48 Uhr"
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
u'url': u'http://www.mdr.de/mediathek/radio/mdr1-radio-sachsen/audio718370_zc-67b21197_zs-1b9b2483.html',
|
||||||
|
u'file': u'718370.mp3',
|
||||||
|
u'md5': u'a9d21345a234c7b45dee612f290fd8d7',
|
||||||
|
u'info_dict': {
|
||||||
|
u"title": u"MDR 1 RADIO SACHSEN 10.12.2013, 05:00 Uhr"
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
m = re.match(self._VALID_URL, url)
|
||||||
|
video_id = m.group('video_id')
|
||||||
|
domain = m.group('domain')
|
||||||
|
|
||||||
|
# determine title and media streams from webpage
|
||||||
|
html = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
title = self._html_search_regex(r'<h2>(.*?)</h2>', html, u'title')
|
||||||
|
xmlurl = self._search_regex(
|
||||||
|
r'(/mediathek/(?:.+)/(?:video|audio)[0-9]+-avCustom.xml)', html, u'XML URL')
|
||||||
|
|
||||||
|
doc = self._download_xml(domain + xmlurl, video_id)
|
||||||
|
formats = []
|
||||||
|
for a in doc.findall('./assets/asset'):
|
||||||
|
url_el = a.find('.//progressiveDownloadUrl')
|
||||||
|
if url_el is None:
|
||||||
|
continue
|
||||||
|
abr = int(a.find('bitrateAudio').text) // 1000
|
||||||
|
media_type = a.find('mediaType').text
|
||||||
|
format = {
|
||||||
|
'abr': abr,
|
||||||
|
'filesize': int(a.find('fileSize').text),
|
||||||
|
'url': url_el.text,
|
||||||
|
}
|
||||||
|
|
||||||
|
vbr_el = a.find('bitrateVideo')
|
||||||
|
if vbr_el is None:
|
||||||
|
format.update({
|
||||||
|
'vcodec': 'none',
|
||||||
|
'format_id': u'%s-%d' % (media_type, abr),
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
vbr = int(vbr_el.text) // 1000
|
||||||
|
format.update({
|
||||||
|
'vbr': vbr,
|
||||||
|
'width': int(a.find('frameWidth').text),
|
||||||
|
'height': int(a.find('frameHeight').text),
|
||||||
|
'format_id': u'%s-%d' % (media_type, vbr),
|
||||||
|
})
|
||||||
|
formats.append(format)
|
||||||
|
formats.sort(key=lambda f: (f.get('vbr'), f['abr']))
|
||||||
|
if not formats:
|
||||||
|
raise ExtractorError(u'Could not find any valid formats')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
@@ -9,7 +9,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class NaverIE(InfoExtractor):
|
class NaverIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://tvcast\.naver\.com/v/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:m\.)?tvcast\.naver\.com/v/(?P<id>\d+)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://tvcast.naver.com/v/81652',
|
u'url': u'http://tvcast.naver.com/v/81652',
|
||||||
|
|||||||
@@ -1,6 +1,4 @@
|
|||||||
import json
|
|
||||||
import re
|
import re
|
||||||
import time
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import month_by_name
|
from ..utils import month_by_name
|
||||||
|
|||||||
38
youtube_dl/extractor/pornhd.py
Normal file
38
youtube_dl/extractor/pornhd.py
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import compat_urllib_parse
|
||||||
|
|
||||||
|
|
||||||
|
class PornHdIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'(?:http://)?(?:www\.)?pornhd\.com/videos/(?P<video_id>[0-9]+)/(?P<video_title>.+)'
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video',
|
||||||
|
u'file': u'1962.flv',
|
||||||
|
u'md5': u'35272469887dca97abd30abecc6cdf75',
|
||||||
|
u'info_dict': {
|
||||||
|
u"title": u"sierra-day-gets-his-cum-all-over-herself-hd-porn-video",
|
||||||
|
u"age_limit": 18,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
|
||||||
|
video_id = mobj.group('video_id')
|
||||||
|
video_title = mobj.group('video_title')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
video_url = self._html_search_regex(
|
||||||
|
r'&hd=(http.+?)&', webpage, u'video URL')
|
||||||
|
video_url = compat_urllib_parse.unquote(video_url)
|
||||||
|
age_limit = 18
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': video_title,
|
||||||
|
'age_limit': age_limit,
|
||||||
|
}
|
||||||
@@ -12,7 +12,7 @@ from ..aes import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
class PornHubIE(InfoExtractor):
|
class PornHubIE(InfoExtractor):
|
||||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>pornhub\.com/view_video\.php\?viewkey=(?P<videoid>[0-9]+))'
|
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>pornhub\.com/view_video\.php\?viewkey=(?P<videoid>[0-9a-f]+))'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.pornhub.com/view_video.php?viewkey=648719015',
|
u'url': u'http://www.pornhub.com/view_video.php?viewkey=648719015',
|
||||||
u'file': u'648719015.mp4',
|
u'file': u'648719015.mp4',
|
||||||
|
|||||||
60
youtube_dl/extractor/radiofrance.py
Normal file
60
youtube_dl/extractor/radiofrance.py
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
import datetime
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
remove_start,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class RadioFranceIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'^https?://maison\.radiofrance\.fr/radiovisions/(?P<id>[^?#]+)'
|
||||||
|
IE_NAME = u'radiofrance'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://maison.radiofrance.fr/radiovisions/one-one',
|
||||||
|
u'file': u'one-one.mp4',
|
||||||
|
u'md5': u'todo',
|
||||||
|
u'info_dict': {
|
||||||
|
u"title": u"One to one",
|
||||||
|
u"description": u"Plutôt que d'imaginer la radio de demain comme technologie ou comme création de contenu, je veux montrer que quelles que soient ses évolutions, j'ai l'intime conviction que la radio continuera d'être un grand média de proximité pour les auditeurs.",
|
||||||
|
u"uploader": u"ferdi",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
m = re.match(self._VALID_URL, url)
|
||||||
|
video_id = m.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, u'title')
|
||||||
|
description = self._html_search_regex(
|
||||||
|
r'<div class="bloc_page_wrapper"><div class="text">(.*?)</div>',
|
||||||
|
webpage, u'description', fatal=False)
|
||||||
|
uploader = self._html_search_regex(
|
||||||
|
r'<div class="credit"> © (.*?)</div>',
|
||||||
|
webpage, u'uploader', fatal=False)
|
||||||
|
|
||||||
|
formats_str = self._html_search_regex(
|
||||||
|
r'class="jp-jplayer[^"]*" data-source="([^"]+)">',
|
||||||
|
webpage, u'audio URLs')
|
||||||
|
formats = [
|
||||||
|
{
|
||||||
|
'format_id': m[0],
|
||||||
|
'url': m[1],
|
||||||
|
'vcodec': 'none',
|
||||||
|
}
|
||||||
|
for m in
|
||||||
|
re.findall(r"([a-z0-9]+)\s*:\s*'([^']+)'", formats_str)
|
||||||
|
]
|
||||||
|
# No sorting, we don't know any more about these formats
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'description': description,
|
||||||
|
'uploader': uploader,
|
||||||
|
}
|
||||||
@@ -3,6 +3,7 @@ import json
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -32,6 +33,17 @@ class ThePlatformIE(InfoExtractor):
|
|||||||
smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?'
|
smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?'
|
||||||
'format=smil&mbr=true'.format(video_id))
|
'format=smil&mbr=true'.format(video_id))
|
||||||
meta = self._download_xml(smil_url, video_id)
|
meta = self._download_xml(smil_url, video_id)
|
||||||
|
|
||||||
|
try:
|
||||||
|
error_msg = next(
|
||||||
|
n.attrib['abstract']
|
||||||
|
for n in meta.findall(_x('.//smil:ref'))
|
||||||
|
if n.attrib.get('title') == u'Geographic Restriction')
|
||||||
|
except StopIteration:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
raise ExtractorError(error_msg, expected=True)
|
||||||
|
|
||||||
info_url = 'http://link.theplatform.com/s/dJ5BDC/{0}?format=preview'.format(video_id)
|
info_url = 'http://link.theplatform.com/s/dJ5BDC/{0}?format=preview'.format(video_id)
|
||||||
info_json = self._download_webpage(info_url, video_id)
|
info_json = self._download_webpage(info_url, video_id)
|
||||||
info = json.loads(info_json)
|
info = json.loads(info_json)
|
||||||
|
|||||||
@@ -15,7 +15,11 @@ class VevoIE(InfoExtractor):
|
|||||||
Accepts urls from vevo.com or in the format 'vevo:{id}'
|
Accepts urls from vevo.com or in the format 'vevo:{id}'
|
||||||
(currently used by MTVIE)
|
(currently used by MTVIE)
|
||||||
"""
|
"""
|
||||||
_VALID_URL = r'((http://www\.vevo\.com/watch/(?:[^/]+/[^/]+/)?)|(vevo:))(?P<id>.*?)(\?|$)'
|
_VALID_URL = r'''(?x)
|
||||||
|
(?:https?://www\.vevo\.com/watch/(?:[^/]+/[^/]+/)?|
|
||||||
|
https?://cache\.vevo\.com/m/html/embed\.html\?video=|
|
||||||
|
vevo:)
|
||||||
|
(?P<id>[^&?#]+)'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
|
u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
|
||||||
u'file': u'GB1101300280.mp4',
|
u'file': u'GB1101300280.mp4',
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ class VideoPremiumIE(InfoExtractor):
|
|||||||
u'params': {
|
u'params': {
|
||||||
u'skip_download': True,
|
u'skip_download': True,
|
||||||
},
|
},
|
||||||
|
u'skip': u'Test file has been deleted.',
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|||||||
@@ -1377,9 +1377,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
|
|
||||||
if 'length_seconds' not in video_info:
|
if 'length_seconds' not in video_info:
|
||||||
self._downloader.report_warning(u'unable to extract video duration')
|
self._downloader.report_warning(u'unable to extract video duration')
|
||||||
video_duration = ''
|
video_duration = None
|
||||||
else:
|
else:
|
||||||
video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
|
video_duration = int(compat_urllib_parse.unquote_plus(video_info['length_seconds'][0]))
|
||||||
|
|
||||||
# annotations
|
# annotations
|
||||||
video_annotations = None
|
video_annotations = None
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
import ctypes
|
||||||
import datetime
|
import datetime
|
||||||
import email.utils
|
import email.utils
|
||||||
import errno
|
import errno
|
||||||
@@ -1051,7 +1052,7 @@ def month_by_name(name):
|
|||||||
""" Return the number of a month by (locale-independently) English name """
|
""" Return the number of a month by (locale-independently) English name """
|
||||||
|
|
||||||
ENGLISH_NAMES = [
|
ENGLISH_NAMES = [
|
||||||
u'Januar', u'February', u'March', u'April', u'May', u'June',
|
u'January', u'February', u'March', u'April', u'May', u'June',
|
||||||
u'July', u'August', u'September', u'October', u'November', u'December']
|
u'July', u'August', u'September', u'October', u'November', u'December']
|
||||||
try:
|
try:
|
||||||
return ENGLISH_NAMES.index(name) + 1
|
return ENGLISH_NAMES.index(name) + 1
|
||||||
@@ -1062,3 +1063,24 @@ def month_by_name(name):
|
|||||||
def fix_xml_all_ampersand(xml_str):
|
def fix_xml_all_ampersand(xml_str):
|
||||||
"""Replace all the '&' by '&' in XML"""
|
"""Replace all the '&' by '&' in XML"""
|
||||||
return xml_str.replace(u'&', u'&')
|
return xml_str.replace(u'&', u'&')
|
||||||
|
|
||||||
|
|
||||||
|
def setproctitle(title):
|
||||||
|
assert isinstance(title, type(u''))
|
||||||
|
try:
|
||||||
|
libc = ctypes.cdll.LoadLibrary("libc.so.6")
|
||||||
|
except OSError:
|
||||||
|
return
|
||||||
|
title = title
|
||||||
|
buf = ctypes.create_string_buffer(len(title) + 1)
|
||||||
|
buf.value = title.encode('utf-8')
|
||||||
|
try:
|
||||||
|
libc.prctl(15, ctypes.byref(buf), 0, 0, 0)
|
||||||
|
except AttributeError:
|
||||||
|
return # Strange libc, just skip this
|
||||||
|
|
||||||
|
|
||||||
|
def remove_start(s, start):
|
||||||
|
if s.startswith(start):
|
||||||
|
return s[len(start):]
|
||||||
|
return s
|
||||||
|
|||||||
@@ -1,2 +1,2 @@
|
|||||||
|
|
||||||
__version__ = '2013.12.11.1'
|
__version__ = '2013.12.16.6'
|
||||||
|
|||||||
Reference in New Issue
Block a user