1
0
mirror of https://source.netsyms.com/Mirrors/youtube-dl synced 2026-04-25 01:27:16 +00:00

Compare commits

..

43 Commits

Author SHA1 Message Date
Philipp Hagemeister
57adeaea87 release 2013.05.23 2013-05-23 13:37:19 +02:00
Philipp Hagemeister
8f3f1aef05 Fix HowCast IE 2013-05-23 13:34:33 +02:00
Filippo Valsorda
51d2453c7a small tweaks 2013-05-21 16:07:27 +02:00
Jaime Marquínez Ferrándiz
45014296be Add TeamcocoIE (closes #212) 2013-05-21 14:37:32 +02:00
Anna Bernardi
afef36c950 add support for Flickr videos - closes #261 2013-05-20 23:19:38 +02:00
Filippo Valsorda
b31756c18e Python 2 compat fixes for MyVideo.de rtmpdump downloads 2013-05-20 11:57:10 +02:00
Filippo Valsorda
f008688520 make rtmpdump inherit the verbose option for debugging 2013-05-20 11:54:21 +02:00
Filippo Valsorda
5b68ea215b Merge pull request #842 - myvideo, rtmp support
@dersphere code, from dersphere/plugin.video.myvideo_de.git
rewritten by @mc2avr
released in the Public Domain by the author
ref: https://github.com/rg3/youtube-dl/pull/842
2013-05-20 09:49:58 +02:00
Jaime Marquínez Ferrándiz
b1d568f0bc HowcastIE: extract thumbnail 2013-05-20 08:39:41 +02:00
Jaime Marquínez Ferrándiz
17bd1b2f41 VineIE: extract more information and minor style changes 2013-05-20 08:31:03 +02:00
Anna Bernardi
5b0d3cc0cd Add support for Vine - closes #845 2013-05-20 00:33:14 +02:00
Filippo Valsorda
d4f76f1674 Add support for Howcast.com - closes #835 2013-05-18 19:17:19 +02:00
Jaime Marquínez Ferrándiz
340fa21198 UstreamIE: get thumbnail and uploader name 2013-05-18 11:54:18 +02:00
mc2avr
de5d66d431 MyVideoIE: add rtmp support 2013-05-15 23:38:44 +02:00
Jaime Marquínez Ferrándiz
7bdb17d4d5 Add extra_info argument to extract_info and process_ie_result
It allows to update the info_dicts with other values

(closes #840)
2013-05-14 14:40:40 +02:00
Philipp Hagemeister
419c64b107 Throw a better error if the protocol is invalid 2013-05-13 19:54:07 +02:00
Philipp Hagemeister
99a5ae3f8e Simplify generic search IE (Closes #839) 2013-05-13 19:53:52 +02:00
Philipp Hagemeister
c7563c528b Merge remote-tracking branch 'jaimeMF/SearchIE' 2013-05-13 19:43:35 +02:00
Jaime Marquínez Ferrándiz
e30e9318da Add base class SearchInfoExtractor for search queries IEs 2013-05-13 14:58:44 +02:00
Philipp Hagemeister
5c51028d38 release 2013.05.14 2013-05-13 13:50:05 +02:00
Philipp Hagemeister
c1d58e1c67 Merge pull request #834 from chocolateboy/install_prefix_fix
only install to /etc if PREFIX is /usr or /usr/local
2013-05-13 00:42:24 -07:00
Philipp Hagemeister
02030ff7fe release 2013.05.13 2013-05-13 09:38:27 +02:00
Philipp Hagemeister
f45c185fa9 Do not re-encode / to # if / is a platform separator, and correctly handle permission errors (Fixes #831) 2013-05-13 09:20:08 +02:00
Philipp Hagemeister
1bd96c3a60 Deprecate --only-sub 2013-05-13 09:06:18 +02:00
Jaime Marquínez Ferrándiz
929f85d851 Remove a print call used for debugging 2013-05-12 20:56:54 +02:00
Jaime Marquínez Ferrándiz
98d4a4e6bc YoutubeSearchIE: return a playlist (related #838) 2013-05-12 20:53:37 +02:00
Jaime Marquínez Ferrándiz
fb2f83360c FFmpegPostProcessor: decode stderr first and then get the last line (closes #837) 2013-05-12 19:08:32 +02:00
Jaime Marquínez Ferrándiz
3c5e7729e1 GoogleSearchIE: change query urls to http://www.google.com/search
The old one was given HTTP 404 errors
2013-05-12 18:44:56 +02:00
Jaime Marquínez Ferrándiz
5a853e1423 Fix YahooSearchIE: (closes #300) 2013-05-12 17:49:35 +02:00
Jaime Marquínez Ferrándiz
2f58b12dad YahooIE: support more videos 2013-05-12 17:05:43 +02:00
Jaime Marquínez Ferrándiz
59f4fd4dc6 YahooIE: remove old code and accept screen.yahoo.com videos (#300)
Videos require rtmpdump
2013-05-12 14:05:14 +02:00
chocolateboy
5738240ee8 only install to /etc if PREFIX is /usr or /usr/local 2013-05-10 23:05:58 +01:00
Philipp Hagemeister
86fd453ea8 Merge remote-tracking branch 'origin/master' 2013-05-10 09:21:24 +02:00
Philipp Hagemeister
c83411b9ee Skip bandcamp tests for now - free limit has been exceeded 2013-05-10 09:10:34 +02:00
Jaime Marquínez Ferrándiz
057c9938a1 Import FileDownloader in test_youtube_subtitles
Fix last commit
2013-05-10 08:37:49 +02:00
Jaime Marquínez Ferrándiz
9259966132 test_youtube_subtitles: FakeDownloader inherits form FileDownloader 2013-05-10 08:31:30 +02:00
Philipp Hagemeister
b08980412e Merge pull request #826 from jakeogh/master
Added --get-id option to print video IDs
2013-05-09 16:52:54 -07:00
Philipp Hagemeister
532a1e0429 release 2013.05.10 2013-05-10 01:45:21 +02:00
Filippo Valsorda
2a36c352a0 Retry to disable YT ratelimit to unlock full bandwidth
This is the second attempt: a60b854d90
Sometimes the ratelimit=yes is already in the URL, and doubling it
leads to a 403. Now should work on all videos, at least works on all
I could test.

Closes #648
2013-05-09 00:39:10 +02:00
jakeogh
1a2adf3f49 added --get-id option to print video IDs 2013-05-05 22:30:07 -07:00
Jaime Marquínez Ferrándiz
43b62accbb GoogleSearchIE: rename _download_n_results to _get_n_results 2013-05-05 22:12:41 +02:00
Jaime Marquínez Ferrándiz
be74864ace Credit @JohnyMoSwag for WorldstarhiphopIE (#730) 2013-05-05 21:56:38 +02:00
Philipp Hagemeister
0ae456f08a Credit @julienfr112 for Ina IE (#823) 2013-05-05 21:35:50 +02:00
10 changed files with 629 additions and 292 deletions

View File

@@ -9,9 +9,19 @@ cleanall: clean
PREFIX=/usr/local PREFIX=/usr/local
BINDIR=$(PREFIX)/bin BINDIR=$(PREFIX)/bin
MANDIR=$(PREFIX)/man MANDIR=$(PREFIX)/man
SYSCONFDIR=/etc
PYTHON=/usr/bin/env python PYTHON=/usr/bin/env python
# set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local
ifeq ($(PREFIX),/usr)
SYSCONFDIR=/etc
else
ifeq ($(PREFIX),/usr/local)
SYSCONFDIR=/etc
else
SYSCONFDIR=$(PREFIX)/etc
endif
endif
install: youtube-dl youtube-dl.1 youtube-dl.bash-completion install: youtube-dl youtube-dl.1 youtube-dl.bash-completion
install -d $(DESTDIR)$(BINDIR) install -d $(DESTDIR)$(BINDIR)
install -m 755 youtube-dl $(DESTDIR)$(BINDIR) install -m 755 youtube-dl $(DESTDIR)$(BINDIR)

View File

@@ -94,6 +94,7 @@ which means you can modify it, redistribute it or use it however you like.
--skip-download do not download the video --skip-download do not download the video
-g, --get-url simulate, quiet but print URL -g, --get-url simulate, quiet but print URL
-e, --get-title simulate, quiet but print title -e, --get-title simulate, quiet but print title
--get-id simulate, quiet but print id
--get-thumbnail simulate, quiet but print thumbnail URL --get-thumbnail simulate, quiet but print thumbnail URL
--get-description simulate, quiet but print video description --get-description simulate, quiet but print video description
--get-filename simulate, quiet but print output filename --get-filename simulate, quiet but print output filename
@@ -115,7 +116,7 @@ which means you can modify it, redistribute it or use it however you like.
-F, --list-formats list all available formats (currently youtube -F, --list-formats list all available formats (currently youtube
only) only)
--write-sub write subtitle file (currently youtube only) --write-sub write subtitle file (currently youtube only)
--only-sub downloads only the subtitles (no video) --only-sub [deprecated] alias of --skip-download
--all-subs downloads all the available subtitles of the --all-subs downloads all the available subtitles of the
video (currently youtube only) video (currently youtube only)
--list-subs lists all available subtitles for the video --list-subs lists all available subtitles for the video

View File

@@ -12,6 +12,7 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from youtube_dl.InfoExtractors import YoutubeIE from youtube_dl.InfoExtractors import YoutubeIE
from youtube_dl.utils import * from youtube_dl.utils import *
from youtube_dl import FileDownloader
PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json") PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json")
with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: with io.open(PARAMETERS_FILE, encoding='utf-8') as pf:
@@ -24,7 +25,7 @@ proxy_handler = compat_urllib_request.ProxyHandler()
opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler()) opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())
compat_urllib_request.install_opener(opener) compat_urllib_request.install_opener(opener)
class FakeDownloader(object): class FakeDownloader(FileDownloader):
def __init__(self): def __init__(self):
self.result = [] self.result = []
self.params = parameters self.params = parameters

View File

@@ -152,7 +152,8 @@
"file": "20274954.flv", "file": "20274954.flv",
"md5": "088f151799e8f572f84eb62f17d73e5c", "md5": "088f151799e8f572f84eb62f17d73e5c",
"info_dict": { "info_dict": {
"title": "Young Americans for Liberty February 7, 2012 2:28 AM" "title": "Young Americans for Liberty February 7, 2012 2:28 AM",
"uploader": "Young Americans for Liberty"
} }
}, },
{ {
@@ -401,7 +402,8 @@
"md5":"cdeb30cdae1921719a3cbcab696ef53c", "md5":"cdeb30cdae1921719a3cbcab696ef53c",
"info_dict": { "info_dict": {
"title":"youtube-dl test song \"'/\\ä↭" "title":"youtube-dl test song \"'/\\ä↭"
} },
"skip": "There is a limit of 200 free downloads / month for the test song"
}, },
{ {
"name": "RedTube", "name": "RedTube",
@@ -429,5 +431,56 @@
"info_dict":{ "info_dict":{
"title":"François Hollande \"Je crois que c'est clair\"" "title":"François Hollande \"Je crois que c'est clair\""
} }
},
{
"name": "Yahoo",
"url": "http://screen.yahoo.com/obama-celebrates-iraq-victory-27592561.html",
"file": "27592561.flv",
"md5": "c6179bed843512823fd284fa2e7f012d",
"info_dict": {
"title": "Obama Celebrates Iraq Victory"
},
"skip": "Requires rtmpdump"
},
{
"name": "Howcast",
"url": "http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly",
"file": "390161.mp4",
"md5": "1d7ba54e2c9d7dc6935ef39e00529138",
"info_dict":{
"title":"How to Tie a Square Knot Properly",
"description":"The square knot, also known as the reef knot, is one of the oldest, most basic knots to tie, and can be used in many different ways. Here's the proper way to tie a square knot."
}
},
{
"name": "Vine",
"url": "https://vine.co/v/b9KOOWX7HUx",
"file": "b9KOOWX7HUx.mp4",
"md5": "2f36fed6235b16da96ce9b4dc890940d",
"info_dict":{
"title": "Chicken.",
"uploader": "Jack Dorsey"
}
},
{
"name": "Flickr",
"url": "http://www.flickr.com/photos/forestwander-nature-pictures/5645318632/in/photostream/",
"file": "5645318632.mp4",
"md5": "6fdc01adbc89d72fc9c4f15b4a4ba87b",
"info_dict":{
"title": "Dark Hollow Waterfalls",
"uploader_id": "forestwander-nature-pictures",
"description": "Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up."
}
},
{
"name": "Teamcoco",
"url": "http://teamcoco.com/video/louis-ck-interview-george-w-bush",
"file": "19705.mp4",
"md5": "27b6f7527da5acf534b15f21b032656e",
"info_dict":{
"title": "Louis C.K. Interview Pt. 1 11/3/11",
"description": "Louis C.K. got starstruck by George W. Bush, so what? Part one."
}
} }
] ]

View File

@@ -54,6 +54,7 @@ class FileDownloader(object):
quiet: Do not print messages to stdout. quiet: Do not print messages to stdout.
forceurl: Force printing final URL. forceurl: Force printing final URL.
forcetitle: Force printing title. forcetitle: Force printing title.
forceid: Force printing ID.
forcethumbnail: Force printing thumbnail URL. forcethumbnail: Force printing thumbnail URL.
forcedescription: Force printing description. forcedescription: Force printing description.
forcefilename: Force printing final filename. forcefilename: Force printing final filename.
@@ -82,7 +83,6 @@ class FileDownloader(object):
writeinfojson: Write the video description to a .info.json file writeinfojson: Write the video description to a .info.json file
writethumbnail: Write the thumbnail image to a file writethumbnail: Write the thumbnail image to a file
writesubtitles: Write the video subtitles to a file writesubtitles: Write the video subtitles to a file
onlysubtitles: Downloads only the subtitles of the video
allsubtitles: Downloads all the subtitles of the video allsubtitles: Downloads all the subtitles of the video
listsubtitles: Lists all available subtitles for the video listsubtitles: Lists all available subtitles for the video
subtitlesformat: Subtitle format [sbv/srt] (default=srt) subtitlesformat: Subtitle format [sbv/srt] (default=srt)
@@ -92,6 +92,7 @@ class FileDownloader(object):
min_filesize: Skip files smaller than this size min_filesize: Skip files smaller than this size
max_filesize: Skip files larger than this size max_filesize: Skip files larger than this size
daterange: A DateRange object, download only if the upload_date is in the range. daterange: A DateRange object, download only if the upload_date is in the range.
skip_download: Skip the actual download of the video file
""" """
params = None params = None
@@ -435,10 +436,11 @@ class FileDownloader(object):
return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange) return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
return None return None
def extract_info(self, url, download=True, ie_key=None): def extract_info(self, url, download=True, ie_key=None, extra_info={}):
''' '''
Returns a list with a dictionary for each video we find. Returns a list with a dictionary for each video we find.
If 'download', also downloads the videos. If 'download', also downloads the videos.
extra_info is a dict containing the extra values to add to each result
''' '''
if ie_key: if ie_key:
@@ -462,10 +464,14 @@ class FileDownloader(object):
break break
if isinstance(ie_result, list): if isinstance(ie_result, list):
# Backwards compatibility: old IE result format # Backwards compatibility: old IE result format
for result in ie_result:
result.update(extra_info)
ie_result = { ie_result = {
'_type': 'compat_list', '_type': 'compat_list',
'entries': ie_result, 'entries': ie_result,
} }
else:
ie_result.update(extra_info)
if 'extractor' not in ie_result: if 'extractor' not in ie_result:
ie_result['extractor'] = ie.IE_NAME ie_result['extractor'] = ie.IE_NAME
return self.process_ie_result(ie_result, download=download) return self.process_ie_result(ie_result, download=download)
@@ -481,7 +487,7 @@ class FileDownloader(object):
else: else:
self.report_error(u'no suitable InfoExtractor: %s' % url) self.report_error(u'no suitable InfoExtractor: %s' % url)
def process_ie_result(self, ie_result, download=True): def process_ie_result(self, ie_result, download=True, extra_info={}):
""" """
Take the result of the ie(may be modified) and resolve all unresolved Take the result of the ie(may be modified) and resolve all unresolved
references (URLs, playlist items). references (URLs, playlist items).
@@ -500,7 +506,12 @@ class FileDownloader(object):
self.process_info(ie_result) self.process_info(ie_result)
return ie_result return ie_result
elif result_type == 'url': elif result_type == 'url':
return self.extract_info(ie_result['url'], download, ie_key=ie_result.get('ie_key')) # We have to add extra_info to the results because it may be
# contained in a playlist
return self.extract_info(ie_result['url'],
download,
ie_key=ie_result.get('ie_key'),
extra_info=extra_info)
elif result_type == 'playlist': elif result_type == 'playlist':
# We process each entry in the playlist # We process each entry in the playlist
playlist = ie_result.get('title', None) or ie_result.get('id', None) playlist = ie_result.get('title', None) or ie_result.get('id', None)
@@ -524,9 +535,13 @@ class FileDownloader(object):
for i,entry in enumerate(entries,1): for i,entry in enumerate(entries,1):
self.to_screen(u'[download] Downloading video #%s of %s' %(i, n_entries)) self.to_screen(u'[download] Downloading video #%s of %s' %(i, n_entries))
entry['playlist'] = playlist extra = {
entry['playlist_index'] = i + playliststart 'playlist': playlist,
entry_result = self.process_ie_result(entry, download=download) 'playlist_index': i + playliststart,
}
entry_result = self.process_ie_result(entry,
download=download,
extra_info=extra)
playlist_results.append(entry_result) playlist_results.append(entry_result)
ie_result['entries'] = playlist_results ie_result['entries'] = playlist_results
return ie_result return ie_result
@@ -574,6 +589,8 @@ class FileDownloader(object):
# Forced printings # Forced printings
if self.params.get('forcetitle', False): if self.params.get('forcetitle', False):
compat_print(info_dict['title']) compat_print(info_dict['title'])
if self.params.get('forceid', False):
compat_print(info_dict['id'])
if self.params.get('forceurl', False): if self.params.get('forceurl', False):
compat_print(info_dict['url']) compat_print(info_dict['url'])
if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict: if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
@@ -594,7 +611,7 @@ class FileDownloader(object):
try: try:
dn = os.path.dirname(encodeFilename(filename)) dn = os.path.dirname(encodeFilename(filename))
if dn != '' and not os.path.exists(dn): # dn is already encoded if dn != '' and not os.path.exists(dn):
os.makedirs(dn) os.makedirs(dn)
except (OSError, IOError) as err: except (OSError, IOError) as err:
self.report_error(u'unable to create directory ' + compat_str(err)) self.report_error(u'unable to create directory ' + compat_str(err))
@@ -627,8 +644,6 @@ class FileDownloader(object):
except (OSError, IOError): except (OSError, IOError):
self.report_error(u'Cannot write subtitles file ' + descfn) self.report_error(u'Cannot write subtitles file ' + descfn)
return return
if self.params.get('onlysubtitles', False):
return
if self.params.get('allsubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']: if self.params.get('allsubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
subtitles = info_dict['subtitles'] subtitles = info_dict['subtitles']
@@ -646,8 +661,6 @@ class FileDownloader(object):
except (OSError, IOError): except (OSError, IOError):
self.report_error(u'Cannot write subtitles file ' + descfn) self.report_error(u'Cannot write subtitles file ' + descfn)
return return
if self.params.get('onlysubtitles', False):
return
if self.params.get('writeinfojson', False): if self.params.get('writeinfojson', False):
infofn = filename + u'.info.json' infofn = filename + u'.info.json'
@@ -735,7 +748,7 @@ class FileDownloader(object):
except (IOError, OSError): except (IOError, OSError):
self.report_warning(u'Unable to remove downloaded video file') self.report_warning(u'Unable to remove downloaded video file')
def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path): def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url):
self.report_destination(filename) self.report_destination(filename)
tmpfilename = self.temp_name(filename) tmpfilename = self.temp_name(filename)
@@ -750,12 +763,15 @@ class FileDownloader(object):
# the connection was interrumpted and resuming appears to be # the connection was interrumpted and resuming appears to be
# possible. This is part of rtmpdump's normal usage, AFAIK. # possible. This is part of rtmpdump's normal usage, AFAIK.
basic_args = ['rtmpdump', '-q', '-r', url, '-o', tmpfilename] basic_args = ['rtmpdump', '-q', '-r', url, '-o', tmpfilename]
if self.params.get('verbose', False): basic_args[1] = '-v'
if player_url is not None: if player_url is not None:
basic_args += ['-W', player_url] basic_args += ['-W', player_url]
if page_url is not None: if page_url is not None:
basic_args += ['--pageUrl', page_url] basic_args += ['--pageUrl', page_url]
if play_path is not None: if play_path is not None:
basic_args += ['-y', play_path] basic_args += ['-y', play_path]
if tc_url is not None:
basic_args += ['--tcUrl', url]
args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)] args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]
if self.params.get('verbose', False): if self.params.get('verbose', False):
try: try:
@@ -811,7 +827,8 @@ class FileDownloader(object):
return self._download_with_rtmpdump(filename, url, return self._download_with_rtmpdump(filename, url,
info_dict.get('player_url', None), info_dict.get('player_url', None),
info_dict.get('page_url', None), info_dict.get('page_url', None),
info_dict.get('play_path', None)) info_dict.get('play_path', None),
info_dict.get('tc_url', None))
tmpfilename = self.temp_name(filename) tmpfilename = self.temp_name(filename)
stream = None stream = None

View File

@@ -16,6 +16,9 @@ import xml.etree.ElementTree
import random import random
import math import math
import operator import operator
import hashlib
import binascii
import urllib
from .utils import * from .utils import *
@@ -188,6 +191,45 @@ class InfoExtractor(object):
video_info['title'] = playlist_title video_info['title'] = playlist_title
return video_info return video_info
class SearchInfoExtractor(InfoExtractor):
"""
Base class for paged search queries extractors.
They accept urls in the format _SEARCH_KEY(|all|[0-9]):{query}
Instances should define _SEARCH_KEY and _MAX_RESULTS.
"""
@classmethod
def _make_valid_url(cls):
return r'%s(?P<prefix>|[1-9][0-9]*|all):(?P<query>[\s\S]+)' % cls._SEARCH_KEY
@classmethod
def suitable(cls, url):
return re.match(cls._make_valid_url(), url) is not None
def _real_extract(self, query):
mobj = re.match(self._make_valid_url(), query)
if mobj is None:
raise ExtractorError(u'Invalid search query "%s"' % query)
prefix = mobj.group('prefix')
query = mobj.group('query')
if prefix == '':
return self._get_n_results(query, 1)
elif prefix == 'all':
return self._get_n_results(query, self._MAX_RESULTS)
else:
n = int(prefix)
if n <= 0:
raise ExtractorError(u'invalid download number %s for query "%s"' % (n, query))
elif n > self._MAX_RESULTS:
self._downloader.report_warning(u'%s returns max %i results (you requested %i)' % (self._SEARCH_KEY, self._MAX_RESULTS, n))
n = self._MAX_RESULTS
return self._get_n_results(query, n)
def _get_n_results(self, query, n):
"""Get a specified number of results for a query"""
raise NotImplementedError("This method must be implemented by sublclasses")
class YoutubeIE(InfoExtractor): class YoutubeIE(InfoExtractor):
"""Information extractor for youtube.com.""" """Information extractor for youtube.com."""
@@ -610,10 +652,13 @@ class YoutubeIE(InfoExtractor):
self.report_rtmp_download() self.report_rtmp_download()
video_url_list = [(None, video_info['conn'][0])] video_url_list = [(None, video_info['conn'][0])]
elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1: elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
url_data_strs = video_info['url_encoded_fmt_stream_map'][0].split(',') url_map = {}
url_data = [compat_parse_qs(uds) for uds in url_data_strs] for url_data_str in video_info['url_encoded_fmt_stream_map'][0].split(','):
url_data = [ud for ud in url_data if 'itag' in ud and 'url' in ud] url_data = compat_parse_qs(url_data_str)
url_map = dict((ud['itag'][0], ud['url'][0] + '&signature=' + ud['sig'][0]) for ud in url_data) if 'itag' in url_data and 'url' in url_data:
url = url_data['url'][0] + '&signature=' + url_data['sig'][0]
if not 'ratebypass' in url: url += '&ratebypass=yes'
url_map[url_data['itag'][0]] = url
format_limit = self._downloader.params.get('format_limit', None) format_limit = self._downloader.params.get('format_limit', None)
available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
@@ -909,123 +954,72 @@ class PhotobucketIE(InfoExtractor):
class YahooIE(InfoExtractor): class YahooIE(InfoExtractor):
"""Information extractor for video.yahoo.com.""" """Information extractor for screen.yahoo.com."""
_VALID_URL = r'http://screen\.yahoo\.com/.*?-(?P<id>\d*?)\.html'
_WORKING = False def _real_extract(self, url):
# _VALID_URL matches all Yahoo! Video URLs
# _VPAGE_URL matches only the extractable '/watch/' URLs
_VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?'
_VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
IE_NAME = u'video.yahoo'
def _real_extract(self, url, new_video=True):
# Extract ID from URL
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
if mobj is None: if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url) raise ExtractorError(u'Invalid URL: %s' % url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
m_id = re.search(r'YUI\.namespace\("Media"\)\.CONTENT_ID = "(?P<new_id>.+?)";', webpage)
video_id = mobj.group(2) if m_id is None:
video_extension = 'flv' # TODO: Check which url parameters are required
info_url = 'http://cosmos.bcst.yahoo.com/rest/v2/pops;lmsoverride=1;outputformat=mrss;cb=974419660;id=%s;rd=news.yahoo.com;datacontext=mdb;lg=KCa2IihxG3qE60vQ7HtyUy' % video_id
webpage = self._download_webpage(info_url, video_id, u'Downloading info webpage')
info_re = r'''<title><!\[CDATA\[(?P<title>.*?)\]\]></title>.*
<description><!\[CDATA\[(?P<description>.*?)\]\]></description>.*
<media:pubStart><!\[CDATA\[(?P<date>.*?)\ .*\]\]></media:pubStart>.*
<media:content\ medium="image"\ url="(?P<thumb>.*?)"\ name="LARGETHUMB"
'''
self.report_extraction(video_id)
m_info = re.search(info_re, webpage, re.VERBOSE|re.DOTALL)
if m_info is None:
raise ExtractorError(u'Unable to extract video info')
video_title = m_info.group('title')
video_description = m_info.group('description')
video_thumb = m_info.group('thumb')
video_date = m_info.group('date')
video_date = datetime.datetime.strptime(video_date, '%m/%d/%Y').strftime('%Y%m%d')
# TODO: Find a way to get mp4 videos
rest_url = 'http://cosmos.bcst.yahoo.com/rest/v2/pops;element=stream;outputformat=mrss;id=%s;lmsoverride=1;bw=375;dynamicstream=1;cb=83521105;tech=flv,mp4;rd=news.yahoo.com;datacontext=mdb;lg=KCa2IihxG3qE60vQ7HtyUy' % video_id
webpage = self._download_webpage(rest_url, video_id, u'Downloading video url webpage')
m_rest = re.search(r'<media:content url="(?P<url>.*?)" path="(?P<path>.*?)"', webpage)
video_url = m_rest.group('url')
video_path = m_rest.group('path')
if m_rest is None:
raise ExtractorError(u'Unable to extract video url')
# Rewrite valid but non-extractable URLs as else: # We have to use a different method if another id is defined
# extractable English language /watch/ URLs long_id = m_id.group('new_id')
if re.match(self._VPAGE_URL, url) is None: info_url = 'http://video.query.yahoo.com/v1/public/yql?q=SELECT%20*%20FROM%20yahoo.media.video.streams%20WHERE%20id%3D%22' + long_id + '%22%20AND%20format%3D%22mp4%2Cflv%22%20AND%20protocol%3D%22rtmp%2Chttp%22%20AND%20plrs%3D%2286Gj0vCaSzV_Iuf6hNylf2%22%20AND%20acctid%3D%22389%22%20AND%20plidl%3D%22%22%20AND%20pspid%3D%22792700001%22%20AND%20offnetwork%3D%22false%22%20AND%20site%3D%22ivy%22%20AND%20lang%3D%22en-US%22%20AND%20region%3D%22US%22%20AND%20override%3D%22none%22%3B&env=prod&format=json&callback=YUI.Env.JSONP.yui_3_8_1_1_1368368376830_335'
request = compat_urllib_request.Request(url) webpage = self._download_webpage(info_url, video_id, u'Downloading info json')
try: json_str = re.search(r'YUI.Env.JSONP.yui.*?\((.*?)\);', webpage).group(1)
webpage = compat_urllib_request.urlopen(request).read() info = json.loads(json_str)
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: res = info[u'query'][u'results'][u'mediaObj'][0]
raise ExtractorError(u'Unable to retrieve video webpage: %s' % compat_str(err)) stream = res[u'streams'][0]
video_path = stream[u'path']
mobj = re.search(r'\("id", "([0-9]+)"\);', webpage) video_url = stream[u'host']
if mobj is None: meta = res[u'meta']
raise ExtractorError(u'Unable to extract id field') video_title = meta[u'title']
yahoo_id = mobj.group(1) video_description = meta[u'description']
video_thumb = meta[u'thumbnail']
mobj = re.search(r'\("vid", "([0-9]+)"\);', webpage) video_date = None # I can't find it
if mobj is None:
raise ExtractorError(u'Unable to extract vid field')
yahoo_vid = mobj.group(1)
url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id)
return self._real_extract(url, new_video=False)
# Retrieve video webpage to extract further information
request = compat_urllib_request.Request(url)
try:
self.report_download_webpage(video_id)
webpage = compat_urllib_request.urlopen(request).read()
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
raise ExtractorError(u'Unable to retrieve video webpage: %s' % compat_str(err))
# Extract uploader and title from webpage
self.report_extraction(video_id)
mobj = re.search(r'<meta name="title" content="(.*)" />', webpage)
if mobj is None:
raise ExtractorError(u'Unable to extract video title')
video_title = mobj.group(1).decode('utf-8')
mobj = re.search(r'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage)
if mobj is None:
raise ExtractorError(u'Unable to extract video uploader')
video_uploader = mobj.group(1).decode('utf-8')
# Extract video thumbnail
mobj = re.search(r'<link rel="image_src" href="(.*)" />', webpage)
if mobj is None:
raise ExtractorError(u'Unable to extract video thumbnail')
video_thumbnail = mobj.group(1).decode('utf-8')
# Extract video description
mobj = re.search(r'<meta name="description" content="(.*)" />', webpage)
if mobj is None:
raise ExtractorError(u'Unable to extract video description')
video_description = mobj.group(1).decode('utf-8')
if not video_description:
video_description = 'No description available.'
# Extract video height and width
mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage)
if mobj is None:
raise ExtractorError(u'Unable to extract video height')
yv_video_height = mobj.group(1)
mobj = re.search(r'<meta name="video_width" content="([0-9]+)" />', webpage)
if mobj is None:
raise ExtractorError(u'Unable to extract video width')
yv_video_width = mobj.group(1)
# Retrieve video playlist to extract media URL
# I'm not completely sure what all these options are, but we
# seem to need most of them, otherwise the server sends a 401.
yv_lg = 'R0xx6idZnW2zlrKP8xxAIR' # not sure what this represents
yv_bitrate = '700' # according to Wikipedia this is hard-coded
request = compat_urllib_request.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
'&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
'&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
try:
self.report_download_webpage(video_id)
webpage = compat_urllib_request.urlopen(request).read()
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
raise ExtractorError(u'Unable to retrieve video webpage: %s' % compat_str(err))
# Extract media URL from playlist XML
mobj = re.search(r'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', webpage)
if mobj is None:
raise ExtractorError(u'Unable to extract media URL')
video_url = compat_urllib_parse.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
video_url = unescapeHTML(video_url)
return [{
'id': video_id.decode('utf-8'),
'url': video_url,
'uploader': video_uploader,
'upload_date': None,
'title': video_title,
'ext': video_extension.decode('utf-8'),
'thumbnail': video_thumbnail.decode('utf-8'),
'description': video_description,
}]
info_dict = {
'id': video_id,
'url': video_url,
'play_path': video_path,
'title':video_title,
'description': video_description,
'thumbnail': video_thumb,
'upload_date': video_date,
'ext': 'flv',
}
return info_dict
class VimeoIE(InfoExtractor): class VimeoIE(InfoExtractor):
"""Information extractor for vimeo.com.""" """Information extractor for vimeo.com."""
@@ -1313,6 +1307,8 @@ class GenericIE(InfoExtractor):
opener.add_handler(handler()) opener.add_handler(handler())
response = opener.open(HeadRequest(url)) response = opener.open(HeadRequest(url))
if response is None:
raise ExtractorError(u'Invalid URL protocol')
new_url = response.geturl() new_url = response.geturl()
if url == new_url: if url == new_url:
@@ -1384,42 +1380,18 @@ class GenericIE(InfoExtractor):
}] }]
class YoutubeSearchIE(InfoExtractor): class YoutubeSearchIE(SearchInfoExtractor):
"""Information Extractor for YouTube search queries.""" """Information Extractor for YouTube search queries."""
_VALID_URL = r'ytsearch(\d+|all)?:[\s\S]+'
_API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc' _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
_max_youtube_results = 1000 _MAX_RESULTS = 1000
IE_NAME = u'youtube:search' IE_NAME = u'youtube:search'
_SEARCH_KEY = 'ytsearch'
def report_download_page(self, query, pagenum): def report_download_page(self, query, pagenum):
"""Report attempt to download search page with given number.""" """Report attempt to download search page with given number."""
query = query.decode(preferredencoding()) query = query.decode(preferredencoding())
self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum)) self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
def _real_extract(self, query):
mobj = re.match(self._VALID_URL, query)
if mobj is None:
raise ExtractorError(u'Invalid search query "%s"' % query)
prefix, query = query.split(':')
prefix = prefix[8:]
query = query.encode('utf-8')
if prefix == '':
return self._get_n_results(query, 1)
elif prefix == 'all':
self._get_n_results(query, self._max_youtube_results)
else:
try:
n = int(prefix)
if n <= 0:
raise ExtractorError(u'Invalid download number %s for query "%s"' % (n, query))
elif n > self._max_youtube_results:
self._downloader.report_warning(u'ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n))
n = self._max_youtube_results
return self._get_n_results(query, n)
except ValueError: # parsing prefix as integer fails
return self._get_n_results(query, 1)
def _get_n_results(self, query, n): def _get_n_results(self, query, n):
"""Get a specified number of results for a query""" """Get a specified number of results for a query"""
@@ -1449,36 +1421,18 @@ class YoutubeSearchIE(InfoExtractor):
if len(video_ids) > n: if len(video_ids) > n:
video_ids = video_ids[:n] video_ids = video_ids[:n]
videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids] videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
return videos return self.playlist_result(videos, query)
class GoogleSearchIE(InfoExtractor): class GoogleSearchIE(SearchInfoExtractor):
"""Information Extractor for Google Video search queries.""" """Information Extractor for Google Video search queries."""
_VALID_URL = r'gvsearch(?P<prefix>|\d+|all):(?P<query>[\s\S]+)' _MORE_PAGES_INDICATOR = r'id="pnnext" class="pn"'
_MORE_PAGES_INDICATOR = r'class="pn" id="pnnext"' _MAX_RESULTS = 1000
_max_google_results = 1000
IE_NAME = u'video.google:search' IE_NAME = u'video.google:search'
_SEARCH_KEY = 'gvsearch'
def _real_extract(self, query): def _get_n_results(self, query, n):
mobj = re.match(self._VALID_URL, query) """Get a specified number of results for a query"""
prefix = mobj.group('prefix')
query = mobj.group('query')
if prefix == '':
return self._download_n_results(query, 1)
elif prefix == 'all':
return self._download_n_results(query, self._max_google_results)
else:
n = int(prefix)
if n <= 0:
raise ExtractorError(u'invalid download number %s for query "%s"' % (n, query))
elif n > self._max_google_results:
self._downloader.report_warning(u'gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n))
n = self._max_google_results
return self._download_n_results(query, n)
def _download_n_results(self, query, n):
"""Downloads a specified number of results for a query"""
res = { res = {
'_type': 'playlist', '_type': 'playlist',
@@ -1487,7 +1441,7 @@ class GoogleSearchIE(InfoExtractor):
} }
for pagenum in itertools.count(1): for pagenum in itertools.count(1):
result_url = u'http://video.google.com/videosearch?q=%s&start=%s&hl=en' % (compat_urllib_parse.quote_plus(query), pagenum*10) result_url = u'http://www.google.com/search?tbm=vid&q=%s&start=%s&hl=en' % (compat_urllib_parse.quote_plus(query), pagenum*10)
webpage = self._download_webpage(result_url, u'gvsearch:' + query, webpage = self._download_webpage(result_url, u'gvsearch:' + query,
note='Downloading result page ' + str(pagenum)) note='Downloading result page ' + str(pagenum))
@@ -1501,84 +1455,39 @@ class GoogleSearchIE(InfoExtractor):
if (pagenum * 10 > n) or not re.search(self._MORE_PAGES_INDICATOR, webpage): if (pagenum * 10 > n) or not re.search(self._MORE_PAGES_INDICATOR, webpage):
return res return res
class YahooSearchIE(InfoExtractor): class YahooSearchIE(SearchInfoExtractor):
"""Information Extractor for Yahoo! Video search queries.""" """Information Extractor for Yahoo! Video search queries."""
_WORKING = False _MAX_RESULTS = 1000
_VALID_URL = r'yvsearch(\d+|all)?:[\s\S]+' IE_NAME = u'screen.yahoo:search'
_TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s' _SEARCH_KEY = 'yvsearch'
_VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"'
_MORE_PAGES_INDICATOR = r'\s*Next'
_max_yahoo_results = 1000
IE_NAME = u'video.yahoo:search'
def report_download_page(self, query, pagenum): def _get_n_results(self, query, n):
"""Report attempt to download playlist page with given number.""" """Get a specified number of results for a query"""
query = query.decode(preferredencoding())
self.to_screen(u'query "%s": Downloading page %s' % (query, pagenum))
def _real_extract(self, query): res = {
mobj = re.match(self._VALID_URL, query) '_type': 'playlist',
if mobj is None: 'id': query,
raise ExtractorError(u'Invalid search query "%s"' % query) 'entries': []
}
for pagenum in itertools.count(0):
result_url = u'http://video.search.yahoo.com/search/?p=%s&fr=screen&o=js&gs=0&b=%d' % (compat_urllib_parse.quote_plus(query), pagenum * 30)
webpage = self._download_webpage(result_url, query,
note='Downloading results page '+str(pagenum+1))
info = json.loads(webpage)
m = info[u'm']
results = info[u'results']
prefix, query = query.split(':') for (i, r) in enumerate(results):
prefix = prefix[8:] if (pagenum * 30) +i >= n:
query = query.encode('utf-8') break
if prefix == '': mobj = re.search(r'(?P<url>screen\.yahoo\.com/.*?-\d*?\.html)"', r)
self._download_n_results(query, 1) e = self.url_result('http://' + mobj.group('url'), 'Yahoo')
return res['entries'].append(e)
elif prefix == 'all': if (pagenum * 30 +i >= n) or (m[u'last'] >= (m[u'total'] -1 )):
self._download_n_results(query, self._max_yahoo_results) break
return
else:
try:
n = int(prefix)
if n <= 0:
raise ExtractorError(u'Invalid download number %s for query "%s"' % (n, query))
elif n > self._max_yahoo_results:
self._downloader.report_warning(u'yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n))
n = self._max_yahoo_results
self._download_n_results(query, n)
return
except ValueError: # parsing prefix as integer fails
self._download_n_results(query, 1)
return
def _download_n_results(self, query, n): return res
"""Downloads a specified number of results for a query"""
video_ids = []
already_seen = set()
pagenum = 1
while True:
self.report_download_page(query, pagenum)
result_url = self._TEMPLATE_URL % (compat_urllib_parse.quote_plus(query), pagenum)
request = compat_urllib_request.Request(result_url)
try:
page = compat_urllib_request.urlopen(request).read()
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
raise ExtractorError(u'Unable to download webpage: %s' % compat_str(err))
# Extract video identifiers
for mobj in re.finditer(self._VIDEO_INDICATOR, page):
video_id = mobj.group(1)
if video_id not in already_seen:
video_ids.append(video_id)
already_seen.add(video_id)
if len(video_ids) == n:
# Specified n videos reached
for id in video_ids:
self._downloader.download(['http://video.yahoo.com/watch/%s' % id])
return
if re.search(self._MORE_PAGES_INDICATOR, page) is None:
for id in video_ids:
self._downloader.download(['http://video.yahoo.com/watch/%s' % id])
return
pagenum = pagenum + 1
class YoutubePlaylistIE(InfoExtractor): class YoutubePlaylistIE(InfoExtractor):
@@ -2073,37 +1982,158 @@ class MyVideoIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*' _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*'
IE_NAME = u'myvideo' IE_NAME = u'myvideo'
# Original Code from: https://github.com/dersphere/plugin.video.myvideo_de.git
# Released into the Public Domain by Tristan Fischer on 2013-05-19
# https://github.com/rg3/youtube-dl/pull/842
def __rc4crypt(self,data, key):
x = 0
box = list(range(256))
for i in list(range(256)):
x = (x + box[i] + compat_ord(key[i % len(key)])) % 256
box[i], box[x] = box[x], box[i]
x = 0
y = 0
out = ''
for char in data:
x = (x + 1) % 256
y = (y + box[x]) % 256
box[x], box[y] = box[y], box[x]
out += chr(compat_ord(char) ^ box[(box[x] + box[y]) % 256])
return out
def __md5(self,s):
return hashlib.md5(s).hexdigest().encode()
def _real_extract(self,url): def _real_extract(self,url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
if mobj is None: if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url) raise ExtractorError(u'invalid URL: %s' % url)
video_id = mobj.group(1) video_id = mobj.group(1)
GK = (
b'WXpnME1EZGhNRGhpTTJNM01XVmhOREU0WldNNVpHTTJOakpt'
b'TW1FMU5tVTBNR05pWkRaa05XRXhNVFJoWVRVd1ptSXhaVEV3'
b'TnpsbA0KTVRkbU1tSTRNdz09'
)
# Get video webpage # Get video webpage
webpage_url = 'http://www.myvideo.de/watch/%s' % video_id webpage_url = 'http://www.myvideo.de/watch/%s' % video_id
webpage = self._download_webpage(webpage_url, video_id) webpage = self._download_webpage(webpage_url, video_id)
mobj = re.search('source src=\'(.+?)[.]([^.]+)\'', webpage)
if mobj is not None:
self.report_extraction(video_id)
video_url = mobj.group(1) + '.flv'
mobj = re.search('<title>([^<]+)</title>', webpage)
if mobj is None:
raise ExtractorError(u'Unable to extract title')
video_title = mobj.group(1)
mobj = re.search('[.](.+?)$', video_url)
if mobj is None:
raise ExtractorError(u'Unable to extract extention')
video_ext = mobj.group(1)
return [{
'id': video_id,
'url': video_url,
'uploader': None,
'upload_date': None,
'title': video_title,
'ext': u'flv',
}]
# try encxml
mobj = re.search('var flashvars={(.+?)}', webpage)
if mobj is None:
raise ExtractorError(u'Unable to extract video')
params = {}
encxml = ''
sec = mobj.group(1)
for (a, b) in re.findall('(.+?):\'(.+?)\',?', sec):
if not a == '_encxml':
params[a] = b
else:
encxml = compat_urllib_parse.unquote(b)
if not params.get('domain'):
params['domain'] = 'www.myvideo.de'
xmldata_url = '%s?%s' % (encxml, compat_urllib_parse.urlencode(params))
if 'flash_playertype=MTV' in xmldata_url:
self._downloader.report_warning(u'avoiding MTV player')
xmldata_url = (
'http://www.myvideo.de/dynamic/get_player_video_xml.php'
'?flash_playertype=D&ID=%s&_countlimit=4&autorun=yes'
) % video_id
# get enc data
enc_data = self._download_webpage(xmldata_url, video_id).split('=')[1]
enc_data_b = binascii.unhexlify(enc_data)
sk = self.__md5(
base64.b64decode(base64.b64decode(GK)) +
self.__md5(
str(video_id).encode('utf-8')
)
)
dec_data = self.__rc4crypt(enc_data_b, sk)
# extracting infos
self.report_extraction(video_id) self.report_extraction(video_id)
mobj = re.search(r'<link rel=\'image_src\' href=\'(http://is[0-9].myvideo\.de/de/movie[0-9]+/[a-f0-9]+)/thumbs/.*?\.jpg\'',
webpage)
if mobj is None:
raise ExtractorError(u'Unable to extract media URL')
video_url = mobj.group(1) + ('/%s.flv' % video_id)
mobj = re.search('<title>([^<]+)</title>', webpage) mobj = re.search('connectionurl=\'(.*?)\'', dec_data)
if mobj is None: if mobj is None:
raise ExtractorError(u'Unable to extract title') raise ExtractorError(u'unable to extract rtmpurl')
video_rtmpurl = compat_urllib_parse.unquote(mobj.group(1))
if 'myvideo2flash' in video_rtmpurl:
self._downloader.report_warning(u'forcing RTMPT ...')
video_rtmpurl = video_rtmpurl.replace('rtmpe://', 'rtmpt://')
# extract non rtmp videos
if (video_rtmpurl is None) or (video_rtmpurl == ''):
mobj = re.search('path=\'(http.*?)\' source=\'(.*?)\'', dec_data)
if mobj is None:
raise ExtractorError(u'unable to extract url')
video_rtmpurl = compat_urllib_parse.unquote(mobj.group(1)) + compat_urllib_parse.unquote(mobj.group(2))
mobj = re.search('source=\'(.*?)\'', dec_data)
if mobj is None:
raise ExtractorError(u'unable to extract swfobj')
video_file = compat_urllib_parse.unquote(mobj.group(1))
if not video_file.endswith('f4m'):
ppath, prefix = video_file.split('.')
video_playpath = '%s:%s' % (prefix, ppath)
video_hls_playlist = ''
else:
video_playpath = ''
video_hls_playlist = (
video_filepath + video_file
).replace('.f4m', '.m3u8')
mobj = re.search('swfobject.embedSWF\(\'(.+?)\'', webpage)
if mobj is None:
raise ExtractorError(u'unable to extract swfobj')
video_swfobj = compat_urllib_parse.unquote(mobj.group(1))
mobj = re.search("<h1(?: class='globalHd')?>(.*?)</h1>", webpage)
if mobj is None:
raise ExtractorError(u'unable to extract title')
video_title = mobj.group(1) video_title = mobj.group(1)
return [{ return [{
'id': video_id, 'id': video_id,
'url': video_url, 'url': video_rtmpurl,
'uploader': None, 'tc_url': video_rtmpurl,
'upload_date': None, 'uploader': None,
'title': video_title, 'upload_date': None,
'ext': u'flv', 'title': video_title,
'ext': u'flv',
'play_path': video_playpath,
'video_file': video_file,
'video_hls_playlist': video_hls_playlist,
'player_url': video_swfobj,
}] }]
class ComedyCentralIE(InfoExtractor): class ComedyCentralIE(InfoExtractor):
@@ -3395,18 +3425,26 @@ class UstreamIE(InfoExtractor):
video_id = m.group('videoID') video_id = m.group('videoID')
video_url = u'http://tcdn.ustream.tv/video/%s' % video_id video_url = u'http://tcdn.ustream.tv/video/%s' % video_id
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
m = re.search(r'data-title="(?P<title>.+)"',webpage) self.report_extraction(video_id)
title = m.group('title') try:
m = re.search(r'<a class="state" data-content-type="channel" data-content-id="(?P<uploader>\d+)"',webpage) m = re.search(r'data-title="(?P<title>.+)"',webpage)
uploader = m.group('uploader') title = m.group('title')
m = re.search(r'data-content-type="channel".*?>(?P<uploader>.*?)</a>',
webpage, re.DOTALL)
uploader = unescapeHTML(m.group('uploader').strip())
m = re.search(r'<link rel="image_src" href="(?P<thumb>.*?)"', webpage)
thumb = m.group('thumb')
except AttributeError:
raise ExtractorError(u'Unable to extract info')
info = { info = {
'id':video_id, 'id':video_id,
'url':video_url, 'url':video_url,
'ext': 'flv', 'ext': 'flv',
'title': title, 'title': title,
'uploader': uploader 'uploader': uploader,
'thumbnail': thumb,
} }
return [info] return info
class WorldStarHipHopIE(InfoExtractor): class WorldStarHipHopIE(InfoExtractor):
_VALID_URL = r'https?://(?:www|m)\.worldstar(?:candy|hiphop)\.com/videos/video\.php\?v=(?P<id>.*)' _VALID_URL = r'https?://(?:www|m)\.worldstar(?:candy|hiphop)\.com/videos/video\.php\?v=(?P<id>.*)'
@@ -4028,7 +4066,7 @@ class TumblrIE(InfoExtractor):
re_video = r'src=\\x22(?P<video_url>http://%s\.tumblr\.com/video_file/%s/(.*?))\\x22 type=\\x22video/(?P<ext>.*?)\\x22' % (blog, video_id) re_video = r'src=\\x22(?P<video_url>http://%s\.tumblr\.com/video_file/%s/(.*?))\\x22 type=\\x22video/(?P<ext>.*?)\\x22' % (blog, video_id)
video = re.search(re_video, webpage) video = re.search(re_video, webpage)
if video is None: if video is None:
self.to_screen("No video founded") self.to_screen("No video found")
return [] return []
video_url = video.group('video_url') video_url = video.group('video_url')
ext = video.group('ext') ext = video.group('ext')
@@ -4127,7 +4165,7 @@ class RedTubeIE(InfoExtractor):
class InaIE(InfoExtractor): class InaIE(InfoExtractor):
"""Information Extractor for Ina.fr""" """Information Extractor for Ina.fr"""
_VALID_URL = r'(?:http://)?(?:www.)?ina\.fr/video/(?P<id>I[0-9]+)/.*' _VALID_URL = r'(?:http://)?(?:www\.)?ina\.fr/video/(?P<id>I[0-9]+)/.*'
def _real_extract(self,url): def _real_extract(self,url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
@@ -4154,6 +4192,201 @@ class InaIE(InfoExtractor):
'title': video_title, 'title': video_title,
}] }]
class HowcastIE(InfoExtractor):
"""Information Extractor for Howcast.com"""
_VALID_URL = r'(?:https?://)?(?:www\.)?howcast\.com/videos/(?P<id>\d+)'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage_url = 'http://www.howcast.com/videos/' + video_id
webpage = self._download_webpage(webpage_url, video_id)
self.report_extraction(video_id)
mobj = re.search(r'\'?file\'?: "(http://mobile-media\.howcast\.com/[0-9]+\.mp4)"', webpage)
if mobj is None:
raise ExtractorError(u'Unable to extract video URL')
video_url = mobj.group(1)
mobj = re.search(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') property=\'og:title\'', webpage)
if mobj is None:
raise ExtractorError(u'Unable to extract title')
video_title = mobj.group(1) or mobj.group(2)
mobj = re.search(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') name=\'description\'', webpage)
if mobj is None:
self._downloader.report_warning(u'unable to extract description')
video_description = None
else:
video_description = mobj.group(1) or mobj.group(2)
mobj = re.search(r'<meta content=\'(.+?)\' property=\'og:image\'', webpage)
if mobj is None:
raise ExtractorError(u'Unable to extract thumbnail')
thumbnail = mobj.group(1)
return [{
'id': video_id,
'url': video_url,
'ext': 'mp4',
'title': video_title,
'description': video_description,
'thumbnail': thumbnail,
}]
class VineIE(InfoExtractor):
"""Information Extractor for Vine.co"""
_VALID_URL = r'(?:https?://)?(?:www\.)?vine\.co/v/(?P<id>\w+)'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage_url = 'https://vine.co/v/' + video_id
webpage = self._download_webpage(webpage_url, video_id)
self.report_extraction(video_id)
mobj = re.search(r'<meta property="twitter:player:stream" content="(.+?)"', webpage)
if mobj is None:
raise ExtractorError(u'Unable to extract video URL')
video_url = mobj.group(1)
mobj = re.search(r'<meta property="og:title" content="(.+?)"', webpage)
if mobj is None:
raise ExtractorError(u'Unable to extract title')
video_title = mobj.group(1)
mobj = re.search(r'<meta property="og:image" content="(.+?)(\?.*?)?"', webpage)
if mobj is None:
raise ExtractorError(u'Unable to extract thumbnail')
thumbnail = mobj.group(1)
mobj = re.search(r'<div class="user">.*?<h2>(.+?)</h2>', webpage, re.DOTALL)
if mobj is None:
raise ExtractorError(u'Unable to extract uploader')
uploader = mobj.group(1)
return [{
'id': video_id,
'url': video_url,
'ext': 'mp4',
'title': video_title,
'thumbnail': thumbnail,
'uploader': uploader,
}]
class FlickrIE(InfoExtractor):
"""Information Extractor for Flickr videos"""
_VALID_URL = r'(?:https?://)?(?:www\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_uploader_id = mobj.group('uploader_id')
webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id
webpage = self._download_webpage(webpage_url, video_id)
mobj = re.search(r"photo_secret: '(\w+)'", webpage)
if mobj is None:
raise ExtractorError(u'Unable to extract video secret')
secret = mobj.group(1)
first_url = 'https://secure.flickr.com/apps/video/video_mtl_xml.gne?v=x&photo_id=' + video_id + '&secret=' + secret + '&bitrate=700&target=_self'
first_xml = self._download_webpage(first_url, video_id, 'Downloading first data webpage')
mobj = re.search(r'<Item id="id">(\d+-\d+)</Item>', first_xml)
if mobj is None:
raise ExtractorError(u'Unable to extract node_id')
node_id = mobj.group(1)
second_url = 'https://secure.flickr.com/video_playlist.gne?node_id=' + node_id + '&tech=flash&mode=playlist&bitrate=700&secret=' + secret + '&rd=video.yahoo.com&noad=1'
second_xml = self._download_webpage(second_url, video_id, 'Downloading second data webpage')
self.report_extraction(video_id)
mobj = re.search(r'<STREAM APP="(.+?)" FULLPATH="(.+?)"', second_xml)
if mobj is None:
raise ExtractorError(u'Unable to extract video url')
video_url = mobj.group(1) + unescapeHTML(mobj.group(2))
mobj = re.search(r'<meta property="og:title" content=(?:"([^"]+)"|\'([^\']+)\')', webpage)
if mobj is None:
raise ExtractorError(u'Unable to extract title')
video_title = mobj.group(1) or mobj.group(2)
mobj = re.search(r'<meta property="og:description" content=(?:"([^"]+)"|\'([^\']+)\')', webpage)
if mobj is None:
self._downloader.report_warning(u'unable to extract description')
video_description = None
else:
video_description = mobj.group(1) or mobj.group(2)
mobj = re.search(r'<meta property="og:image" content=(?:"([^"]+)"|\'([^\']+)\')', webpage)
if mobj is None:
raise ExtractorError(u'Unable to extract thumbnail')
thumbnail = mobj.group(1) or mobj.group(2)
return [{
'id': video_id,
'url': video_url,
'ext': 'mp4',
'title': video_title,
'description': video_description,
'thumbnail': thumbnail,
'uploader_id': video_uploader_id,
}]
class TeamcocoIE(InfoExtractor):
_VALID_URL = r'http://teamcoco\.com/video/(?P<url_title>.*)'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url)
url_title = mobj.group('url_title')
webpage = self._download_webpage(url, url_title)
mobj = re.search(r'<article class="video" data-id="(\d+?)"', webpage)
video_id = mobj.group(1)
self.report_extraction(video_id)
mobj = re.search(r'<meta property="og:title" content="(.+?)"', webpage)
if mobj is None:
raise ExtractorError(u'Unable to extract title')
video_title = mobj.group(1)
mobj = re.search(r'<meta property="og:image" content="(.+?)"', webpage)
if mobj is None:
raise ExtractorError(u'Unable to extract thumbnail')
thumbnail = mobj.group(1)
mobj = re.search(r'<meta property="og:description" content="(.*?)"', webpage)
if mobj is None:
raise ExtractorError(u'Unable to extract description')
description = mobj.group(1)
data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
data = self._download_webpage(data_url, video_id, 'Downloading data webpage')
mobj = re.search(r'<file type="high".*?>(.*?)</file>', data)
if mobj is None:
raise ExtractorError(u'Unable to extract video url')
video_url = mobj.group(1)
return [{
'id': video_id,
'url': video_url,
'ext': 'mp4',
'title': video_title,
'thumbnail': thumbnail,
'description': description,
}]
def gen_extractors(): def gen_extractors():
""" Return a list of an instance of every supported extractor. """ Return a list of an instance of every supported extractor.
The order does matter; the first extractor matched is the one handling the URL. The order does matter; the first extractor matched is the one handling the URL.
@@ -4211,6 +4444,10 @@ def gen_extractors():
BandcampIE(), BandcampIE(),
RedTubeIE(), RedTubeIE(),
InaIE(), InaIE(),
HowcastIE(),
VineIE(),
FlickrIE(),
TeamcocoIE(),
GenericIE() GenericIE()
] ]

View File

@@ -85,8 +85,9 @@ class FFmpegPostProcessor(PostProcessor):
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout,stderr = p.communicate() stdout,stderr = p.communicate()
if p.returncode != 0: if p.returncode != 0:
stderr = stderr.decode('utf-8', 'replace')
msg = stderr.strip().split('\n')[-1] msg = stderr.strip().split('\n')[-1]
raise FFmpegPostProcessorError(msg.decode('utf-8', 'replace')) raise FFmpegPostProcessorError(msg)
def _ffmpeg_filename_argument(self, fn): def _ffmpeg_filename_argument(self, fn):
# ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details # ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details

View File

@@ -26,6 +26,8 @@ __authors__ = (
'Osama Khalid', 'Osama Khalid',
'Michael Walter', 'Michael Walter',
'M. Yasoob Ullah Khalid', 'M. Yasoob Ullah Khalid',
'Julien Fraichard',
'Johny Mo Swag',
) )
__license__ = 'Public Domain' __license__ = 'Public Domain'
@@ -189,8 +191,8 @@ def parseOpts(overrideArguments=None):
action='store_true', dest='writesubtitles', action='store_true', dest='writesubtitles',
help='write subtitle file (currently youtube only)', default=False) help='write subtitle file (currently youtube only)', default=False)
video_format.add_option('--only-sub', video_format.add_option('--only-sub',
action='store_true', dest='onlysubtitles', action='store_true', dest='skip_download',
help='downloads only the subtitles (no video)', default=False) help='[deprecated] alias of --skip-download', default=False)
video_format.add_option('--all-subs', video_format.add_option('--all-subs',
action='store_true', dest='allsubtitles', action='store_true', dest='allsubtitles',
help='downloads all the available subtitles of the video (currently youtube only)', default=False) help='downloads all the available subtitles of the video (currently youtube only)', default=False)
@@ -214,6 +216,8 @@ def parseOpts(overrideArguments=None):
action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False) action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
verbosity.add_option('-e', '--get-title', verbosity.add_option('-e', '--get-title',
action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False) action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
verbosity.add_option('--get-id',
action='store_true', dest='getid', help='simulate, quiet but print id', default=False)
verbosity.add_option('--get-thumbnail', verbosity.add_option('--get-thumbnail',
action='store_true', dest='getthumbnail', action='store_true', dest='getthumbnail',
help='simulate, quiet but print thumbnail URL', default=False) help='simulate, quiet but print thumbnail URL', default=False)
@@ -493,15 +497,16 @@ def _real_main(argv=None):
'usenetrc': opts.usenetrc, 'usenetrc': opts.usenetrc,
'username': opts.username, 'username': opts.username,
'password': opts.password, 'password': opts.password,
'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat), 'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
'forceurl': opts.geturl, 'forceurl': opts.geturl,
'forcetitle': opts.gettitle, 'forcetitle': opts.gettitle,
'forceid': opts.getid,
'forcethumbnail': opts.getthumbnail, 'forcethumbnail': opts.getthumbnail,
'forcedescription': opts.getdescription, 'forcedescription': opts.getdescription,
'forcefilename': opts.getfilename, 'forcefilename': opts.getfilename,
'forceformat': opts.getformat, 'forceformat': opts.getformat,
'simulate': opts.simulate, 'simulate': opts.simulate,
'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat), 'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
'format': opts.format, 'format': opts.format,
'format_limit': opts.format_limit, 'format_limit': opts.format_limit,
'listformats': opts.listformats, 'listformats': opts.listformats,
@@ -527,7 +532,6 @@ def _real_main(argv=None):
'writeinfojson': opts.writeinfojson, 'writeinfojson': opts.writeinfojson,
'writethumbnail': opts.writethumbnail, 'writethumbnail': opts.writethumbnail,
'writesubtitles': opts.writesubtitles, 'writesubtitles': opts.writesubtitles,
'onlysubtitles': opts.onlysubtitles,
'allsubtitles': opts.allsubtitles, 'allsubtitles': opts.allsubtitles,
'listsubtitles': opts.listsubtitles, 'listsubtitles': opts.listsubtitles,
'subtitlesformat': opts.subtitlesformat, 'subtitlesformat': opts.subtitlesformat,

View File

@@ -1,6 +1,7 @@
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import errno
import gzip import gzip
import io import io
import json import json
@@ -149,6 +150,10 @@ try:
except NameError: except NameError:
compat_chr = chr compat_chr = chr
def compat_ord(c):
if type(c) is int: return c
else: return ord(c)
std_headers = { std_headers = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0', 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0',
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
@@ -334,12 +339,20 @@ def sanitize_open(filename, open_mode):
stream = open(encodeFilename(filename), open_mode) stream = open(encodeFilename(filename), open_mode)
return (stream, filename) return (stream, filename)
except (IOError, OSError) as err: except (IOError, OSError) as err:
# In case of error, try to remove win32 forbidden chars if err.errno in (errno.EACCES,):
filename = re.sub(u'[/<>:"\\|\\\\?\\*]', u'#', filename) raise
# An exception here should be caught in the caller # In case of error, try to remove win32 forbidden chars
stream = open(encodeFilename(filename), open_mode) alt_filename = os.path.join(
return (stream, filename) re.sub(u'[/<>:"\\|\\\\?\\*]', u'#', path_part)
for path_part in os.path.split(filename)
)
if alt_filename == filename:
raise
else:
# An exception here should be caught in the caller
stream = open(encodeFilename(filename), open_mode)
return (stream, alt_filename)
def timeconvert(timestr): def timeconvert(timestr):

View File

@@ -1,2 +1,2 @@
__version__ = '2013.05.07' __version__ = '2013.05.23'