mirror of
https://source.netsyms.com/Mirrors/youtube-dl
synced 2026-04-24 21:11:16 +00:00
Compare commits
43 Commits
2013.05.07
...
2013.05.23
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
57adeaea87 | ||
|
|
8f3f1aef05 | ||
|
|
51d2453c7a | ||
|
|
45014296be | ||
|
|
afef36c950 | ||
|
|
b31756c18e | ||
|
|
f008688520 | ||
|
|
5b68ea215b | ||
|
|
b1d568f0bc | ||
|
|
17bd1b2f41 | ||
|
|
5b0d3cc0cd | ||
|
|
d4f76f1674 | ||
|
|
340fa21198 | ||
|
|
de5d66d431 | ||
|
|
7bdb17d4d5 | ||
|
|
419c64b107 | ||
|
|
99a5ae3f8e | ||
|
|
c7563c528b | ||
|
|
e30e9318da | ||
|
|
5c51028d38 | ||
|
|
c1d58e1c67 | ||
|
|
02030ff7fe | ||
|
|
f45c185fa9 | ||
|
|
1bd96c3a60 | ||
|
|
929f85d851 | ||
|
|
98d4a4e6bc | ||
|
|
fb2f83360c | ||
|
|
3c5e7729e1 | ||
|
|
5a853e1423 | ||
|
|
2f58b12dad | ||
|
|
59f4fd4dc6 | ||
|
|
5738240ee8 | ||
|
|
86fd453ea8 | ||
|
|
c83411b9ee | ||
|
|
057c9938a1 | ||
|
|
9259966132 | ||
|
|
b08980412e | ||
|
|
532a1e0429 | ||
|
|
2a36c352a0 | ||
|
|
1a2adf3f49 | ||
|
|
43b62accbb | ||
|
|
be74864ace | ||
|
|
0ae456f08a |
12
Makefile
12
Makefile
@@ -9,9 +9,19 @@ cleanall: clean
|
||||
PREFIX=/usr/local
|
||||
BINDIR=$(PREFIX)/bin
|
||||
MANDIR=$(PREFIX)/man
|
||||
SYSCONFDIR=/etc
|
||||
PYTHON=/usr/bin/env python
|
||||
|
||||
# set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local
|
||||
ifeq ($(PREFIX),/usr)
|
||||
SYSCONFDIR=/etc
|
||||
else
|
||||
ifeq ($(PREFIX),/usr/local)
|
||||
SYSCONFDIR=/etc
|
||||
else
|
||||
SYSCONFDIR=$(PREFIX)/etc
|
||||
endif
|
||||
endif
|
||||
|
||||
install: youtube-dl youtube-dl.1 youtube-dl.bash-completion
|
||||
install -d $(DESTDIR)$(BINDIR)
|
||||
install -m 755 youtube-dl $(DESTDIR)$(BINDIR)
|
||||
|
||||
@@ -94,6 +94,7 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
--skip-download do not download the video
|
||||
-g, --get-url simulate, quiet but print URL
|
||||
-e, --get-title simulate, quiet but print title
|
||||
--get-id simulate, quiet but print id
|
||||
--get-thumbnail simulate, quiet but print thumbnail URL
|
||||
--get-description simulate, quiet but print video description
|
||||
--get-filename simulate, quiet but print output filename
|
||||
@@ -115,7 +116,7 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
-F, --list-formats list all available formats (currently youtube
|
||||
only)
|
||||
--write-sub write subtitle file (currently youtube only)
|
||||
--only-sub downloads only the subtitles (no video)
|
||||
--only-sub [deprecated] alias of --skip-download
|
||||
--all-subs downloads all the available subtitles of the
|
||||
video (currently youtube only)
|
||||
--list-subs lists all available subtitles for the video
|
||||
|
||||
@@ -12,6 +12,7 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from youtube_dl.InfoExtractors import YoutubeIE
|
||||
from youtube_dl.utils import *
|
||||
from youtube_dl import FileDownloader
|
||||
|
||||
PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json")
|
||||
with io.open(PARAMETERS_FILE, encoding='utf-8') as pf:
|
||||
@@ -24,7 +25,7 @@ proxy_handler = compat_urllib_request.ProxyHandler()
|
||||
opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())
|
||||
compat_urllib_request.install_opener(opener)
|
||||
|
||||
class FakeDownloader(object):
|
||||
class FakeDownloader(FileDownloader):
|
||||
def __init__(self):
|
||||
self.result = []
|
||||
self.params = parameters
|
||||
|
||||
@@ -152,7 +152,8 @@
|
||||
"file": "20274954.flv",
|
||||
"md5": "088f151799e8f572f84eb62f17d73e5c",
|
||||
"info_dict": {
|
||||
"title": "Young Americans for Liberty February 7, 2012 2:28 AM"
|
||||
"title": "Young Americans for Liberty February 7, 2012 2:28 AM",
|
||||
"uploader": "Young Americans for Liberty"
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -401,7 +402,8 @@
|
||||
"md5":"cdeb30cdae1921719a3cbcab696ef53c",
|
||||
"info_dict": {
|
||||
"title":"youtube-dl test song \"'/\\ä↭"
|
||||
}
|
||||
},
|
||||
"skip": "There is a limit of 200 free downloads / month for the test song"
|
||||
},
|
||||
{
|
||||
"name": "RedTube",
|
||||
@@ -429,5 +431,56 @@
|
||||
"info_dict":{
|
||||
"title":"François Hollande \"Je crois que c'est clair\""
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Yahoo",
|
||||
"url": "http://screen.yahoo.com/obama-celebrates-iraq-victory-27592561.html",
|
||||
"file": "27592561.flv",
|
||||
"md5": "c6179bed843512823fd284fa2e7f012d",
|
||||
"info_dict": {
|
||||
"title": "Obama Celebrates Iraq Victory"
|
||||
},
|
||||
"skip": "Requires rtmpdump"
|
||||
},
|
||||
{
|
||||
"name": "Howcast",
|
||||
"url": "http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly",
|
||||
"file": "390161.mp4",
|
||||
"md5": "1d7ba54e2c9d7dc6935ef39e00529138",
|
||||
"info_dict":{
|
||||
"title":"How to Tie a Square Knot Properly",
|
||||
"description":"The square knot, also known as the reef knot, is one of the oldest, most basic knots to tie, and can be used in many different ways. Here's the proper way to tie a square knot."
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Vine",
|
||||
"url": "https://vine.co/v/b9KOOWX7HUx",
|
||||
"file": "b9KOOWX7HUx.mp4",
|
||||
"md5": "2f36fed6235b16da96ce9b4dc890940d",
|
||||
"info_dict":{
|
||||
"title": "Chicken.",
|
||||
"uploader": "Jack Dorsey"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Flickr",
|
||||
"url": "http://www.flickr.com/photos/forestwander-nature-pictures/5645318632/in/photostream/",
|
||||
"file": "5645318632.mp4",
|
||||
"md5": "6fdc01adbc89d72fc9c4f15b4a4ba87b",
|
||||
"info_dict":{
|
||||
"title": "Dark Hollow Waterfalls",
|
||||
"uploader_id": "forestwander-nature-pictures",
|
||||
"description": "Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up."
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Teamcoco",
|
||||
"url": "http://teamcoco.com/video/louis-ck-interview-george-w-bush",
|
||||
"file": "19705.mp4",
|
||||
"md5": "27b6f7527da5acf534b15f21b032656e",
|
||||
"info_dict":{
|
||||
"title": "Louis C.K. Interview Pt. 1 11/3/11",
|
||||
"description": "Louis C.K. got starstruck by George W. Bush, so what? Part one."
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
@@ -54,6 +54,7 @@ class FileDownloader(object):
|
||||
quiet: Do not print messages to stdout.
|
||||
forceurl: Force printing final URL.
|
||||
forcetitle: Force printing title.
|
||||
forceid: Force printing ID.
|
||||
forcethumbnail: Force printing thumbnail URL.
|
||||
forcedescription: Force printing description.
|
||||
forcefilename: Force printing final filename.
|
||||
@@ -82,7 +83,6 @@ class FileDownloader(object):
|
||||
writeinfojson: Write the video description to a .info.json file
|
||||
writethumbnail: Write the thumbnail image to a file
|
||||
writesubtitles: Write the video subtitles to a file
|
||||
onlysubtitles: Downloads only the subtitles of the video
|
||||
allsubtitles: Downloads all the subtitles of the video
|
||||
listsubtitles: Lists all available subtitles for the video
|
||||
subtitlesformat: Subtitle format [sbv/srt] (default=srt)
|
||||
@@ -92,6 +92,7 @@ class FileDownloader(object):
|
||||
min_filesize: Skip files smaller than this size
|
||||
max_filesize: Skip files larger than this size
|
||||
daterange: A DateRange object, download only if the upload_date is in the range.
|
||||
skip_download: Skip the actual download of the video file
|
||||
"""
|
||||
|
||||
params = None
|
||||
@@ -435,10 +436,11 @@ class FileDownloader(object):
|
||||
return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
|
||||
return None
|
||||
|
||||
def extract_info(self, url, download=True, ie_key=None):
|
||||
def extract_info(self, url, download=True, ie_key=None, extra_info={}):
|
||||
'''
|
||||
Returns a list with a dictionary for each video we find.
|
||||
If 'download', also downloads the videos.
|
||||
extra_info is a dict containing the extra values to add to each result
|
||||
'''
|
||||
|
||||
if ie_key:
|
||||
@@ -462,10 +464,14 @@ class FileDownloader(object):
|
||||
break
|
||||
if isinstance(ie_result, list):
|
||||
# Backwards compatibility: old IE result format
|
||||
for result in ie_result:
|
||||
result.update(extra_info)
|
||||
ie_result = {
|
||||
'_type': 'compat_list',
|
||||
'entries': ie_result,
|
||||
}
|
||||
else:
|
||||
ie_result.update(extra_info)
|
||||
if 'extractor' not in ie_result:
|
||||
ie_result['extractor'] = ie.IE_NAME
|
||||
return self.process_ie_result(ie_result, download=download)
|
||||
@@ -481,7 +487,7 @@ class FileDownloader(object):
|
||||
else:
|
||||
self.report_error(u'no suitable InfoExtractor: %s' % url)
|
||||
|
||||
def process_ie_result(self, ie_result, download=True):
|
||||
def process_ie_result(self, ie_result, download=True, extra_info={}):
|
||||
"""
|
||||
Take the result of the ie(may be modified) and resolve all unresolved
|
||||
references (URLs, playlist items).
|
||||
@@ -500,7 +506,12 @@ class FileDownloader(object):
|
||||
self.process_info(ie_result)
|
||||
return ie_result
|
||||
elif result_type == 'url':
|
||||
return self.extract_info(ie_result['url'], download, ie_key=ie_result.get('ie_key'))
|
||||
# We have to add extra_info to the results because it may be
|
||||
# contained in a playlist
|
||||
return self.extract_info(ie_result['url'],
|
||||
download,
|
||||
ie_key=ie_result.get('ie_key'),
|
||||
extra_info=extra_info)
|
||||
elif result_type == 'playlist':
|
||||
# We process each entry in the playlist
|
||||
playlist = ie_result.get('title', None) or ie_result.get('id', None)
|
||||
@@ -524,9 +535,13 @@ class FileDownloader(object):
|
||||
|
||||
for i,entry in enumerate(entries,1):
|
||||
self.to_screen(u'[download] Downloading video #%s of %s' %(i, n_entries))
|
||||
entry['playlist'] = playlist
|
||||
entry['playlist_index'] = i + playliststart
|
||||
entry_result = self.process_ie_result(entry, download=download)
|
||||
extra = {
|
||||
'playlist': playlist,
|
||||
'playlist_index': i + playliststart,
|
||||
}
|
||||
entry_result = self.process_ie_result(entry,
|
||||
download=download,
|
||||
extra_info=extra)
|
||||
playlist_results.append(entry_result)
|
||||
ie_result['entries'] = playlist_results
|
||||
return ie_result
|
||||
@@ -574,6 +589,8 @@ class FileDownloader(object):
|
||||
# Forced printings
|
||||
if self.params.get('forcetitle', False):
|
||||
compat_print(info_dict['title'])
|
||||
if self.params.get('forceid', False):
|
||||
compat_print(info_dict['id'])
|
||||
if self.params.get('forceurl', False):
|
||||
compat_print(info_dict['url'])
|
||||
if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
|
||||
@@ -594,7 +611,7 @@ class FileDownloader(object):
|
||||
|
||||
try:
|
||||
dn = os.path.dirname(encodeFilename(filename))
|
||||
if dn != '' and not os.path.exists(dn): # dn is already encoded
|
||||
if dn != '' and not os.path.exists(dn):
|
||||
os.makedirs(dn)
|
||||
except (OSError, IOError) as err:
|
||||
self.report_error(u'unable to create directory ' + compat_str(err))
|
||||
@@ -627,8 +644,6 @@ class FileDownloader(object):
|
||||
except (OSError, IOError):
|
||||
self.report_error(u'Cannot write subtitles file ' + descfn)
|
||||
return
|
||||
if self.params.get('onlysubtitles', False):
|
||||
return
|
||||
|
||||
if self.params.get('allsubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
|
||||
subtitles = info_dict['subtitles']
|
||||
@@ -646,8 +661,6 @@ class FileDownloader(object):
|
||||
except (OSError, IOError):
|
||||
self.report_error(u'Cannot write subtitles file ' + descfn)
|
||||
return
|
||||
if self.params.get('onlysubtitles', False):
|
||||
return
|
||||
|
||||
if self.params.get('writeinfojson', False):
|
||||
infofn = filename + u'.info.json'
|
||||
@@ -735,7 +748,7 @@ class FileDownloader(object):
|
||||
except (IOError, OSError):
|
||||
self.report_warning(u'Unable to remove downloaded video file')
|
||||
|
||||
def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path):
|
||||
def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url):
|
||||
self.report_destination(filename)
|
||||
tmpfilename = self.temp_name(filename)
|
||||
|
||||
@@ -750,12 +763,15 @@ class FileDownloader(object):
|
||||
# the connection was interrumpted and resuming appears to be
|
||||
# possible. This is part of rtmpdump's normal usage, AFAIK.
|
||||
basic_args = ['rtmpdump', '-q', '-r', url, '-o', tmpfilename]
|
||||
if self.params.get('verbose', False): basic_args[1] = '-v'
|
||||
if player_url is not None:
|
||||
basic_args += ['-W', player_url]
|
||||
if page_url is not None:
|
||||
basic_args += ['--pageUrl', page_url]
|
||||
if play_path is not None:
|
||||
basic_args += ['-y', play_path]
|
||||
if tc_url is not None:
|
||||
basic_args += ['--tcUrl', url]
|
||||
args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]
|
||||
if self.params.get('verbose', False):
|
||||
try:
|
||||
@@ -811,7 +827,8 @@ class FileDownloader(object):
|
||||
return self._download_with_rtmpdump(filename, url,
|
||||
info_dict.get('player_url', None),
|
||||
info_dict.get('page_url', None),
|
||||
info_dict.get('play_path', None))
|
||||
info_dict.get('play_path', None),
|
||||
info_dict.get('tc_url', None))
|
||||
|
||||
tmpfilename = self.temp_name(filename)
|
||||
stream = None
|
||||
|
||||
@@ -16,6 +16,9 @@ import xml.etree.ElementTree
|
||||
import random
|
||||
import math
|
||||
import operator
|
||||
import hashlib
|
||||
import binascii
|
||||
import urllib
|
||||
|
||||
from .utils import *
|
||||
|
||||
@@ -188,6 +191,45 @@ class InfoExtractor(object):
|
||||
video_info['title'] = playlist_title
|
||||
return video_info
|
||||
|
||||
class SearchInfoExtractor(InfoExtractor):
|
||||
"""
|
||||
Base class for paged search queries extractors.
|
||||
They accept urls in the format _SEARCH_KEY(|all|[0-9]):{query}
|
||||
Instances should define _SEARCH_KEY and _MAX_RESULTS.
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def _make_valid_url(cls):
|
||||
return r'%s(?P<prefix>|[1-9][0-9]*|all):(?P<query>[\s\S]+)' % cls._SEARCH_KEY
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return re.match(cls._make_valid_url(), url) is not None
|
||||
|
||||
def _real_extract(self, query):
|
||||
mobj = re.match(self._make_valid_url(), query)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid search query "%s"' % query)
|
||||
|
||||
prefix = mobj.group('prefix')
|
||||
query = mobj.group('query')
|
||||
if prefix == '':
|
||||
return self._get_n_results(query, 1)
|
||||
elif prefix == 'all':
|
||||
return self._get_n_results(query, self._MAX_RESULTS)
|
||||
else:
|
||||
n = int(prefix)
|
||||
if n <= 0:
|
||||
raise ExtractorError(u'invalid download number %s for query "%s"' % (n, query))
|
||||
elif n > self._MAX_RESULTS:
|
||||
self._downloader.report_warning(u'%s returns max %i results (you requested %i)' % (self._SEARCH_KEY, self._MAX_RESULTS, n))
|
||||
n = self._MAX_RESULTS
|
||||
return self._get_n_results(query, n)
|
||||
|
||||
def _get_n_results(self, query, n):
|
||||
"""Get a specified number of results for a query"""
|
||||
raise NotImplementedError("This method must be implemented by sublclasses")
|
||||
|
||||
|
||||
class YoutubeIE(InfoExtractor):
|
||||
"""Information extractor for youtube.com."""
|
||||
@@ -610,10 +652,13 @@ class YoutubeIE(InfoExtractor):
|
||||
self.report_rtmp_download()
|
||||
video_url_list = [(None, video_info['conn'][0])]
|
||||
elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
|
||||
url_data_strs = video_info['url_encoded_fmt_stream_map'][0].split(',')
|
||||
url_data = [compat_parse_qs(uds) for uds in url_data_strs]
|
||||
url_data = [ud for ud in url_data if 'itag' in ud and 'url' in ud]
|
||||
url_map = dict((ud['itag'][0], ud['url'][0] + '&signature=' + ud['sig'][0]) for ud in url_data)
|
||||
url_map = {}
|
||||
for url_data_str in video_info['url_encoded_fmt_stream_map'][0].split(','):
|
||||
url_data = compat_parse_qs(url_data_str)
|
||||
if 'itag' in url_data and 'url' in url_data:
|
||||
url = url_data['url'][0] + '&signature=' + url_data['sig'][0]
|
||||
if not 'ratebypass' in url: url += '&ratebypass=yes'
|
||||
url_map[url_data['itag'][0]] = url
|
||||
|
||||
format_limit = self._downloader.params.get('format_limit', None)
|
||||
available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
|
||||
@@ -909,123 +954,72 @@ class PhotobucketIE(InfoExtractor):
|
||||
|
||||
|
||||
class YahooIE(InfoExtractor):
|
||||
"""Information extractor for video.yahoo.com."""
|
||||
"""Information extractor for screen.yahoo.com."""
|
||||
_VALID_URL = r'http://screen\.yahoo\.com/.*?-(?P<id>\d*?)\.html'
|
||||
|
||||
_WORKING = False
|
||||
# _VALID_URL matches all Yahoo! Video URLs
|
||||
# _VPAGE_URL matches only the extractable '/watch/' URLs
|
||||
_VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?'
|
||||
_VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
|
||||
IE_NAME = u'video.yahoo'
|
||||
|
||||
def _real_extract(self, url, new_video=True):
|
||||
# Extract ID from URL
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
m_id = re.search(r'YUI\.namespace\("Media"\)\.CONTENT_ID = "(?P<new_id>.+?)";', webpage)
|
||||
|
||||
video_id = mobj.group(2)
|
||||
video_extension = 'flv'
|
||||
if m_id is None:
|
||||
# TODO: Check which url parameters are required
|
||||
info_url = 'http://cosmos.bcst.yahoo.com/rest/v2/pops;lmsoverride=1;outputformat=mrss;cb=974419660;id=%s;rd=news.yahoo.com;datacontext=mdb;lg=KCa2IihxG3qE60vQ7HtyUy' % video_id
|
||||
webpage = self._download_webpage(info_url, video_id, u'Downloading info webpage')
|
||||
info_re = r'''<title><!\[CDATA\[(?P<title>.*?)\]\]></title>.*
|
||||
<description><!\[CDATA\[(?P<description>.*?)\]\]></description>.*
|
||||
<media:pubStart><!\[CDATA\[(?P<date>.*?)\ .*\]\]></media:pubStart>.*
|
||||
<media:content\ medium="image"\ url="(?P<thumb>.*?)"\ name="LARGETHUMB"
|
||||
'''
|
||||
self.report_extraction(video_id)
|
||||
m_info = re.search(info_re, webpage, re.VERBOSE|re.DOTALL)
|
||||
if m_info is None:
|
||||
raise ExtractorError(u'Unable to extract video info')
|
||||
video_title = m_info.group('title')
|
||||
video_description = m_info.group('description')
|
||||
video_thumb = m_info.group('thumb')
|
||||
video_date = m_info.group('date')
|
||||
video_date = datetime.datetime.strptime(video_date, '%m/%d/%Y').strftime('%Y%m%d')
|
||||
|
||||
# TODO: Find a way to get mp4 videos
|
||||
rest_url = 'http://cosmos.bcst.yahoo.com/rest/v2/pops;element=stream;outputformat=mrss;id=%s;lmsoverride=1;bw=375;dynamicstream=1;cb=83521105;tech=flv,mp4;rd=news.yahoo.com;datacontext=mdb;lg=KCa2IihxG3qE60vQ7HtyUy' % video_id
|
||||
webpage = self._download_webpage(rest_url, video_id, u'Downloading video url webpage')
|
||||
m_rest = re.search(r'<media:content url="(?P<url>.*?)" path="(?P<path>.*?)"', webpage)
|
||||
video_url = m_rest.group('url')
|
||||
video_path = m_rest.group('path')
|
||||
if m_rest is None:
|
||||
raise ExtractorError(u'Unable to extract video url')
|
||||
|
||||
# Rewrite valid but non-extractable URLs as
|
||||
# extractable English language /watch/ URLs
|
||||
if re.match(self._VPAGE_URL, url) is None:
|
||||
request = compat_urllib_request.Request(url)
|
||||
try:
|
||||
webpage = compat_urllib_request.urlopen(request).read()
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
raise ExtractorError(u'Unable to retrieve video webpage: %s' % compat_str(err))
|
||||
|
||||
mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Unable to extract id field')
|
||||
yahoo_id = mobj.group(1)
|
||||
|
||||
mobj = re.search(r'\("vid", "([0-9]+)"\);', webpage)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Unable to extract vid field')
|
||||
yahoo_vid = mobj.group(1)
|
||||
|
||||
url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id)
|
||||
return self._real_extract(url, new_video=False)
|
||||
|
||||
# Retrieve video webpage to extract further information
|
||||
request = compat_urllib_request.Request(url)
|
||||
try:
|
||||
self.report_download_webpage(video_id)
|
||||
webpage = compat_urllib_request.urlopen(request).read()
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
raise ExtractorError(u'Unable to retrieve video webpage: %s' % compat_str(err))
|
||||
|
||||
# Extract uploader and title from webpage
|
||||
self.report_extraction(video_id)
|
||||
mobj = re.search(r'<meta name="title" content="(.*)" />', webpage)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Unable to extract video title')
|
||||
video_title = mobj.group(1).decode('utf-8')
|
||||
|
||||
mobj = re.search(r'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Unable to extract video uploader')
|
||||
video_uploader = mobj.group(1).decode('utf-8')
|
||||
|
||||
# Extract video thumbnail
|
||||
mobj = re.search(r'<link rel="image_src" href="(.*)" />', webpage)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Unable to extract video thumbnail')
|
||||
video_thumbnail = mobj.group(1).decode('utf-8')
|
||||
|
||||
# Extract video description
|
||||
mobj = re.search(r'<meta name="description" content="(.*)" />', webpage)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Unable to extract video description')
|
||||
video_description = mobj.group(1).decode('utf-8')
|
||||
if not video_description:
|
||||
video_description = 'No description available.'
|
||||
|
||||
# Extract video height and width
|
||||
mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Unable to extract video height')
|
||||
yv_video_height = mobj.group(1)
|
||||
|
||||
mobj = re.search(r'<meta name="video_width" content="([0-9]+)" />', webpage)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Unable to extract video width')
|
||||
yv_video_width = mobj.group(1)
|
||||
|
||||
# Retrieve video playlist to extract media URL
|
||||
# I'm not completely sure what all these options are, but we
|
||||
# seem to need most of them, otherwise the server sends a 401.
|
||||
yv_lg = 'R0xx6idZnW2zlrKP8xxAIR' # not sure what this represents
|
||||
yv_bitrate = '700' # according to Wikipedia this is hard-coded
|
||||
request = compat_urllib_request.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
|
||||
'&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
|
||||
'&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
|
||||
try:
|
||||
self.report_download_webpage(video_id)
|
||||
webpage = compat_urllib_request.urlopen(request).read()
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
raise ExtractorError(u'Unable to retrieve video webpage: %s' % compat_str(err))
|
||||
|
||||
# Extract media URL from playlist XML
|
||||
mobj = re.search(r'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', webpage)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Unable to extract media URL')
|
||||
video_url = compat_urllib_parse.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
|
||||
video_url = unescapeHTML(video_url)
|
||||
|
||||
return [{
|
||||
'id': video_id.decode('utf-8'),
|
||||
'url': video_url,
|
||||
'uploader': video_uploader,
|
||||
'upload_date': None,
|
||||
'title': video_title,
|
||||
'ext': video_extension.decode('utf-8'),
|
||||
'thumbnail': video_thumbnail.decode('utf-8'),
|
||||
'description': video_description,
|
||||
}]
|
||||
else: # We have to use a different method if another id is defined
|
||||
long_id = m_id.group('new_id')
|
||||
info_url = 'http://video.query.yahoo.com/v1/public/yql?q=SELECT%20*%20FROM%20yahoo.media.video.streams%20WHERE%20id%3D%22' + long_id + '%22%20AND%20format%3D%22mp4%2Cflv%22%20AND%20protocol%3D%22rtmp%2Chttp%22%20AND%20plrs%3D%2286Gj0vCaSzV_Iuf6hNylf2%22%20AND%20acctid%3D%22389%22%20AND%20plidl%3D%22%22%20AND%20pspid%3D%22792700001%22%20AND%20offnetwork%3D%22false%22%20AND%20site%3D%22ivy%22%20AND%20lang%3D%22en-US%22%20AND%20region%3D%22US%22%20AND%20override%3D%22none%22%3B&env=prod&format=json&callback=YUI.Env.JSONP.yui_3_8_1_1_1368368376830_335'
|
||||
webpage = self._download_webpage(info_url, video_id, u'Downloading info json')
|
||||
json_str = re.search(r'YUI.Env.JSONP.yui.*?\((.*?)\);', webpage).group(1)
|
||||
info = json.loads(json_str)
|
||||
res = info[u'query'][u'results'][u'mediaObj'][0]
|
||||
stream = res[u'streams'][0]
|
||||
video_path = stream[u'path']
|
||||
video_url = stream[u'host']
|
||||
meta = res[u'meta']
|
||||
video_title = meta[u'title']
|
||||
video_description = meta[u'description']
|
||||
video_thumb = meta[u'thumbnail']
|
||||
video_date = None # I can't find it
|
||||
|
||||
info_dict = {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'play_path': video_path,
|
||||
'title':video_title,
|
||||
'description': video_description,
|
||||
'thumbnail': video_thumb,
|
||||
'upload_date': video_date,
|
||||
'ext': 'flv',
|
||||
}
|
||||
return info_dict
|
||||
|
||||
class VimeoIE(InfoExtractor):
|
||||
"""Information extractor for vimeo.com."""
|
||||
@@ -1313,6 +1307,8 @@ class GenericIE(InfoExtractor):
|
||||
opener.add_handler(handler())
|
||||
|
||||
response = opener.open(HeadRequest(url))
|
||||
if response is None:
|
||||
raise ExtractorError(u'Invalid URL protocol')
|
||||
new_url = response.geturl()
|
||||
|
||||
if url == new_url:
|
||||
@@ -1384,42 +1380,18 @@ class GenericIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
|
||||
class YoutubeSearchIE(InfoExtractor):
|
||||
class YoutubeSearchIE(SearchInfoExtractor):
|
||||
"""Information Extractor for YouTube search queries."""
|
||||
_VALID_URL = r'ytsearch(\d+|all)?:[\s\S]+'
|
||||
_API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
|
||||
_max_youtube_results = 1000
|
||||
_MAX_RESULTS = 1000
|
||||
IE_NAME = u'youtube:search'
|
||||
_SEARCH_KEY = 'ytsearch'
|
||||
|
||||
def report_download_page(self, query, pagenum):
|
||||
"""Report attempt to download search page with given number."""
|
||||
query = query.decode(preferredencoding())
|
||||
self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
|
||||
|
||||
def _real_extract(self, query):
|
||||
mobj = re.match(self._VALID_URL, query)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid search query "%s"' % query)
|
||||
|
||||
prefix, query = query.split(':')
|
||||
prefix = prefix[8:]
|
||||
query = query.encode('utf-8')
|
||||
if prefix == '':
|
||||
return self._get_n_results(query, 1)
|
||||
elif prefix == 'all':
|
||||
self._get_n_results(query, self._max_youtube_results)
|
||||
else:
|
||||
try:
|
||||
n = int(prefix)
|
||||
if n <= 0:
|
||||
raise ExtractorError(u'Invalid download number %s for query "%s"' % (n, query))
|
||||
elif n > self._max_youtube_results:
|
||||
self._downloader.report_warning(u'ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n))
|
||||
n = self._max_youtube_results
|
||||
return self._get_n_results(query, n)
|
||||
except ValueError: # parsing prefix as integer fails
|
||||
return self._get_n_results(query, 1)
|
||||
|
||||
def _get_n_results(self, query, n):
|
||||
"""Get a specified number of results for a query"""
|
||||
|
||||
@@ -1449,36 +1421,18 @@ class YoutubeSearchIE(InfoExtractor):
|
||||
if len(video_ids) > n:
|
||||
video_ids = video_ids[:n]
|
||||
videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
|
||||
return videos
|
||||
return self.playlist_result(videos, query)
|
||||
|
||||
|
||||
class GoogleSearchIE(InfoExtractor):
|
||||
class GoogleSearchIE(SearchInfoExtractor):
|
||||
"""Information Extractor for Google Video search queries."""
|
||||
_VALID_URL = r'gvsearch(?P<prefix>|\d+|all):(?P<query>[\s\S]+)'
|
||||
_MORE_PAGES_INDICATOR = r'class="pn" id="pnnext"'
|
||||
_max_google_results = 1000
|
||||
_MORE_PAGES_INDICATOR = r'id="pnnext" class="pn"'
|
||||
_MAX_RESULTS = 1000
|
||||
IE_NAME = u'video.google:search'
|
||||
_SEARCH_KEY = 'gvsearch'
|
||||
|
||||
def _real_extract(self, query):
|
||||
mobj = re.match(self._VALID_URL, query)
|
||||
|
||||
prefix = mobj.group('prefix')
|
||||
query = mobj.group('query')
|
||||
if prefix == '':
|
||||
return self._download_n_results(query, 1)
|
||||
elif prefix == 'all':
|
||||
return self._download_n_results(query, self._max_google_results)
|
||||
else:
|
||||
n = int(prefix)
|
||||
if n <= 0:
|
||||
raise ExtractorError(u'invalid download number %s for query "%s"' % (n, query))
|
||||
elif n > self._max_google_results:
|
||||
self._downloader.report_warning(u'gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n))
|
||||
n = self._max_google_results
|
||||
return self._download_n_results(query, n)
|
||||
|
||||
def _download_n_results(self, query, n):
|
||||
"""Downloads a specified number of results for a query"""
|
||||
def _get_n_results(self, query, n):
|
||||
"""Get a specified number of results for a query"""
|
||||
|
||||
res = {
|
||||
'_type': 'playlist',
|
||||
@@ -1487,7 +1441,7 @@ class GoogleSearchIE(InfoExtractor):
|
||||
}
|
||||
|
||||
for pagenum in itertools.count(1):
|
||||
result_url = u'http://video.google.com/videosearch?q=%s&start=%s&hl=en' % (compat_urllib_parse.quote_plus(query), pagenum*10)
|
||||
result_url = u'http://www.google.com/search?tbm=vid&q=%s&start=%s&hl=en' % (compat_urllib_parse.quote_plus(query), pagenum*10)
|
||||
webpage = self._download_webpage(result_url, u'gvsearch:' + query,
|
||||
note='Downloading result page ' + str(pagenum))
|
||||
|
||||
@@ -1501,84 +1455,39 @@ class GoogleSearchIE(InfoExtractor):
|
||||
if (pagenum * 10 > n) or not re.search(self._MORE_PAGES_INDICATOR, webpage):
|
||||
return res
|
||||
|
||||
class YahooSearchIE(InfoExtractor):
|
||||
class YahooSearchIE(SearchInfoExtractor):
|
||||
"""Information Extractor for Yahoo! Video search queries."""
|
||||
|
||||
_WORKING = False
|
||||
_VALID_URL = r'yvsearch(\d+|all)?:[\s\S]+'
|
||||
_TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s'
|
||||
_VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"'
|
||||
_MORE_PAGES_INDICATOR = r'\s*Next'
|
||||
_max_yahoo_results = 1000
|
||||
IE_NAME = u'video.yahoo:search'
|
||||
_MAX_RESULTS = 1000
|
||||
IE_NAME = u'screen.yahoo:search'
|
||||
_SEARCH_KEY = 'yvsearch'
|
||||
|
||||
def report_download_page(self, query, pagenum):
|
||||
"""Report attempt to download playlist page with given number."""
|
||||
query = query.decode(preferredencoding())
|
||||
self.to_screen(u'query "%s": Downloading page %s' % (query, pagenum))
|
||||
def _get_n_results(self, query, n):
|
||||
"""Get a specified number of results for a query"""
|
||||
|
||||
def _real_extract(self, query):
|
||||
mobj = re.match(self._VALID_URL, query)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid search query "%s"' % query)
|
||||
res = {
|
||||
'_type': 'playlist',
|
||||
'id': query,
|
||||
'entries': []
|
||||
}
|
||||
for pagenum in itertools.count(0):
|
||||
result_url = u'http://video.search.yahoo.com/search/?p=%s&fr=screen&o=js&gs=0&b=%d' % (compat_urllib_parse.quote_plus(query), pagenum * 30)
|
||||
webpage = self._download_webpage(result_url, query,
|
||||
note='Downloading results page '+str(pagenum+1))
|
||||
info = json.loads(webpage)
|
||||
m = info[u'm']
|
||||
results = info[u'results']
|
||||
|
||||
prefix, query = query.split(':')
|
||||
prefix = prefix[8:]
|
||||
query = query.encode('utf-8')
|
||||
if prefix == '':
|
||||
self._download_n_results(query, 1)
|
||||
return
|
||||
elif prefix == 'all':
|
||||
self._download_n_results(query, self._max_yahoo_results)
|
||||
return
|
||||
else:
|
||||
try:
|
||||
n = int(prefix)
|
||||
if n <= 0:
|
||||
raise ExtractorError(u'Invalid download number %s for query "%s"' % (n, query))
|
||||
elif n > self._max_yahoo_results:
|
||||
self._downloader.report_warning(u'yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n))
|
||||
n = self._max_yahoo_results
|
||||
self._download_n_results(query, n)
|
||||
return
|
||||
except ValueError: # parsing prefix as integer fails
|
||||
self._download_n_results(query, 1)
|
||||
return
|
||||
for (i, r) in enumerate(results):
|
||||
if (pagenum * 30) +i >= n:
|
||||
break
|
||||
mobj = re.search(r'(?P<url>screen\.yahoo\.com/.*?-\d*?\.html)"', r)
|
||||
e = self.url_result('http://' + mobj.group('url'), 'Yahoo')
|
||||
res['entries'].append(e)
|
||||
if (pagenum * 30 +i >= n) or (m[u'last'] >= (m[u'total'] -1 )):
|
||||
break
|
||||
|
||||
def _download_n_results(self, query, n):
|
||||
"""Downloads a specified number of results for a query"""
|
||||
|
||||
video_ids = []
|
||||
already_seen = set()
|
||||
pagenum = 1
|
||||
|
||||
while True:
|
||||
self.report_download_page(query, pagenum)
|
||||
result_url = self._TEMPLATE_URL % (compat_urllib_parse.quote_plus(query), pagenum)
|
||||
request = compat_urllib_request.Request(result_url)
|
||||
try:
|
||||
page = compat_urllib_request.urlopen(request).read()
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
raise ExtractorError(u'Unable to download webpage: %s' % compat_str(err))
|
||||
|
||||
# Extract video identifiers
|
||||
for mobj in re.finditer(self._VIDEO_INDICATOR, page):
|
||||
video_id = mobj.group(1)
|
||||
if video_id not in already_seen:
|
||||
video_ids.append(video_id)
|
||||
already_seen.add(video_id)
|
||||
if len(video_ids) == n:
|
||||
# Specified n videos reached
|
||||
for id in video_ids:
|
||||
self._downloader.download(['http://video.yahoo.com/watch/%s' % id])
|
||||
return
|
||||
|
||||
if re.search(self._MORE_PAGES_INDICATOR, page) is None:
|
||||
for id in video_ids:
|
||||
self._downloader.download(['http://video.yahoo.com/watch/%s' % id])
|
||||
return
|
||||
|
||||
pagenum = pagenum + 1
|
||||
return res
|
||||
|
||||
|
||||
class YoutubePlaylistIE(InfoExtractor):
|
||||
@@ -2073,37 +1982,158 @@ class MyVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*'
|
||||
IE_NAME = u'myvideo'
|
||||
|
||||
# Original Code from: https://github.com/dersphere/plugin.video.myvideo_de.git
|
||||
# Released into the Public Domain by Tristan Fischer on 2013-05-19
|
||||
# https://github.com/rg3/youtube-dl/pull/842
|
||||
def __rc4crypt(self,data, key):
|
||||
x = 0
|
||||
box = list(range(256))
|
||||
for i in list(range(256)):
|
||||
x = (x + box[i] + compat_ord(key[i % len(key)])) % 256
|
||||
box[i], box[x] = box[x], box[i]
|
||||
x = 0
|
||||
y = 0
|
||||
out = ''
|
||||
for char in data:
|
||||
x = (x + 1) % 256
|
||||
y = (y + box[x]) % 256
|
||||
box[x], box[y] = box[y], box[x]
|
||||
out += chr(compat_ord(char) ^ box[(box[x] + box[y]) % 256])
|
||||
return out
|
||||
|
||||
def __md5(self,s):
|
||||
return hashlib.md5(s).hexdigest().encode()
|
||||
|
||||
def _real_extract(self,url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
raise ExtractorError(u'invalid URL: %s' % url)
|
||||
|
||||
video_id = mobj.group(1)
|
||||
|
||||
GK = (
|
||||
b'WXpnME1EZGhNRGhpTTJNM01XVmhOREU0WldNNVpHTTJOakpt'
|
||||
b'TW1FMU5tVTBNR05pWkRaa05XRXhNVFJoWVRVd1ptSXhaVEV3'
|
||||
b'TnpsbA0KTVRkbU1tSTRNdz09'
|
||||
)
|
||||
|
||||
# Get video webpage
|
||||
webpage_url = 'http://www.myvideo.de/watch/%s' % video_id
|
||||
webpage = self._download_webpage(webpage_url, video_id)
|
||||
|
||||
mobj = re.search('source src=\'(.+?)[.]([^.]+)\'', webpage)
|
||||
if mobj is not None:
|
||||
self.report_extraction(video_id)
|
||||
video_url = mobj.group(1) + '.flv'
|
||||
|
||||
mobj = re.search('<title>([^<]+)</title>', webpage)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Unable to extract title')
|
||||
video_title = mobj.group(1)
|
||||
|
||||
mobj = re.search('[.](.+?)$', video_url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Unable to extract extention')
|
||||
video_ext = mobj.group(1)
|
||||
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'uploader': None,
|
||||
'upload_date': None,
|
||||
'title': video_title,
|
||||
'ext': u'flv',
|
||||
}]
|
||||
|
||||
# try encxml
|
||||
mobj = re.search('var flashvars={(.+?)}', webpage)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Unable to extract video')
|
||||
|
||||
params = {}
|
||||
encxml = ''
|
||||
sec = mobj.group(1)
|
||||
for (a, b) in re.findall('(.+?):\'(.+?)\',?', sec):
|
||||
if not a == '_encxml':
|
||||
params[a] = b
|
||||
else:
|
||||
encxml = compat_urllib_parse.unquote(b)
|
||||
if not params.get('domain'):
|
||||
params['domain'] = 'www.myvideo.de'
|
||||
xmldata_url = '%s?%s' % (encxml, compat_urllib_parse.urlencode(params))
|
||||
if 'flash_playertype=MTV' in xmldata_url:
|
||||
self._downloader.report_warning(u'avoiding MTV player')
|
||||
xmldata_url = (
|
||||
'http://www.myvideo.de/dynamic/get_player_video_xml.php'
|
||||
'?flash_playertype=D&ID=%s&_countlimit=4&autorun=yes'
|
||||
) % video_id
|
||||
|
||||
# get enc data
|
||||
enc_data = self._download_webpage(xmldata_url, video_id).split('=')[1]
|
||||
enc_data_b = binascii.unhexlify(enc_data)
|
||||
sk = self.__md5(
|
||||
base64.b64decode(base64.b64decode(GK)) +
|
||||
self.__md5(
|
||||
str(video_id).encode('utf-8')
|
||||
)
|
||||
)
|
||||
dec_data = self.__rc4crypt(enc_data_b, sk)
|
||||
|
||||
# extracting infos
|
||||
self.report_extraction(video_id)
|
||||
mobj = re.search(r'<link rel=\'image_src\' href=\'(http://is[0-9].myvideo\.de/de/movie[0-9]+/[a-f0-9]+)/thumbs/.*?\.jpg\'',
|
||||
webpage)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Unable to extract media URL')
|
||||
video_url = mobj.group(1) + ('/%s.flv' % video_id)
|
||||
|
||||
mobj = re.search('<title>([^<]+)</title>', webpage)
|
||||
mobj = re.search('connectionurl=\'(.*?)\'', dec_data)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Unable to extract title')
|
||||
raise ExtractorError(u'unable to extract rtmpurl')
|
||||
video_rtmpurl = compat_urllib_parse.unquote(mobj.group(1))
|
||||
if 'myvideo2flash' in video_rtmpurl:
|
||||
self._downloader.report_warning(u'forcing RTMPT ...')
|
||||
video_rtmpurl = video_rtmpurl.replace('rtmpe://', 'rtmpt://')
|
||||
|
||||
# extract non rtmp videos
|
||||
if (video_rtmpurl is None) or (video_rtmpurl == ''):
|
||||
mobj = re.search('path=\'(http.*?)\' source=\'(.*?)\'', dec_data)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'unable to extract url')
|
||||
video_rtmpurl = compat_urllib_parse.unquote(mobj.group(1)) + compat_urllib_parse.unquote(mobj.group(2))
|
||||
|
||||
mobj = re.search('source=\'(.*?)\'', dec_data)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'unable to extract swfobj')
|
||||
video_file = compat_urllib_parse.unquote(mobj.group(1))
|
||||
|
||||
if not video_file.endswith('f4m'):
|
||||
ppath, prefix = video_file.split('.')
|
||||
video_playpath = '%s:%s' % (prefix, ppath)
|
||||
video_hls_playlist = ''
|
||||
else:
|
||||
video_playpath = ''
|
||||
video_hls_playlist = (
|
||||
video_filepath + video_file
|
||||
).replace('.f4m', '.m3u8')
|
||||
|
||||
mobj = re.search('swfobject.embedSWF\(\'(.+?)\'', webpage)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'unable to extract swfobj')
|
||||
video_swfobj = compat_urllib_parse.unquote(mobj.group(1))
|
||||
|
||||
mobj = re.search("<h1(?: class='globalHd')?>(.*?)</h1>", webpage)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'unable to extract title')
|
||||
video_title = mobj.group(1)
|
||||
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'uploader': None,
|
||||
'upload_date': None,
|
||||
'title': video_title,
|
||||
'ext': u'flv',
|
||||
'id': video_id,
|
||||
'url': video_rtmpurl,
|
||||
'tc_url': video_rtmpurl,
|
||||
'uploader': None,
|
||||
'upload_date': None,
|
||||
'title': video_title,
|
||||
'ext': u'flv',
|
||||
'play_path': video_playpath,
|
||||
'video_file': video_file,
|
||||
'video_hls_playlist': video_hls_playlist,
|
||||
'player_url': video_swfobj,
|
||||
}]
|
||||
|
||||
class ComedyCentralIE(InfoExtractor):
|
||||
@@ -3395,18 +3425,26 @@ class UstreamIE(InfoExtractor):
|
||||
video_id = m.group('videoID')
|
||||
video_url = u'http://tcdn.ustream.tv/video/%s' % video_id
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
m = re.search(r'data-title="(?P<title>.+)"',webpage)
|
||||
title = m.group('title')
|
||||
m = re.search(r'<a class="state" data-content-type="channel" data-content-id="(?P<uploader>\d+)"',webpage)
|
||||
uploader = m.group('uploader')
|
||||
self.report_extraction(video_id)
|
||||
try:
|
||||
m = re.search(r'data-title="(?P<title>.+)"',webpage)
|
||||
title = m.group('title')
|
||||
m = re.search(r'data-content-type="channel".*?>(?P<uploader>.*?)</a>',
|
||||
webpage, re.DOTALL)
|
||||
uploader = unescapeHTML(m.group('uploader').strip())
|
||||
m = re.search(r'<link rel="image_src" href="(?P<thumb>.*?)"', webpage)
|
||||
thumb = m.group('thumb')
|
||||
except AttributeError:
|
||||
raise ExtractorError(u'Unable to extract info')
|
||||
info = {
|
||||
'id':video_id,
|
||||
'url':video_url,
|
||||
'ext': 'flv',
|
||||
'title': title,
|
||||
'uploader': uploader
|
||||
'uploader': uploader,
|
||||
'thumbnail': thumb,
|
||||
}
|
||||
return [info]
|
||||
return info
|
||||
|
||||
class WorldStarHipHopIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www|m)\.worldstar(?:candy|hiphop)\.com/videos/video\.php\?v=(?P<id>.*)'
|
||||
@@ -4028,7 +4066,7 @@ class TumblrIE(InfoExtractor):
|
||||
re_video = r'src=\\x22(?P<video_url>http://%s\.tumblr\.com/video_file/%s/(.*?))\\x22 type=\\x22video/(?P<ext>.*?)\\x22' % (blog, video_id)
|
||||
video = re.search(re_video, webpage)
|
||||
if video is None:
|
||||
self.to_screen("No video founded")
|
||||
self.to_screen("No video found")
|
||||
return []
|
||||
video_url = video.group('video_url')
|
||||
ext = video.group('ext')
|
||||
@@ -4127,7 +4165,7 @@ class RedTubeIE(InfoExtractor):
|
||||
|
||||
class InaIE(InfoExtractor):
|
||||
"""Information Extractor for Ina.fr"""
|
||||
_VALID_URL = r'(?:http://)?(?:www.)?ina\.fr/video/(?P<id>I[0-9]+)/.*'
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?ina\.fr/video/(?P<id>I[0-9]+)/.*'
|
||||
|
||||
def _real_extract(self,url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
@@ -4154,6 +4192,201 @@ class InaIE(InfoExtractor):
|
||||
'title': video_title,
|
||||
}]
|
||||
|
||||
class HowcastIE(InfoExtractor):
|
||||
"""Information Extractor for Howcast.com"""
|
||||
_VALID_URL = r'(?:https?://)?(?:www\.)?howcast\.com/videos/(?P<id>\d+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
video_id = mobj.group('id')
|
||||
webpage_url = 'http://www.howcast.com/videos/' + video_id
|
||||
webpage = self._download_webpage(webpage_url, video_id)
|
||||
|
||||
self.report_extraction(video_id)
|
||||
|
||||
mobj = re.search(r'\'?file\'?: "(http://mobile-media\.howcast\.com/[0-9]+\.mp4)"', webpage)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Unable to extract video URL')
|
||||
video_url = mobj.group(1)
|
||||
|
||||
mobj = re.search(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') property=\'og:title\'', webpage)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Unable to extract title')
|
||||
video_title = mobj.group(1) or mobj.group(2)
|
||||
|
||||
mobj = re.search(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') name=\'description\'', webpage)
|
||||
if mobj is None:
|
||||
self._downloader.report_warning(u'unable to extract description')
|
||||
video_description = None
|
||||
else:
|
||||
video_description = mobj.group(1) or mobj.group(2)
|
||||
|
||||
mobj = re.search(r'<meta content=\'(.+?)\' property=\'og:image\'', webpage)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Unable to extract thumbnail')
|
||||
thumbnail = mobj.group(1)
|
||||
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'title': video_title,
|
||||
'description': video_description,
|
||||
'thumbnail': thumbnail,
|
||||
}]
|
||||
|
||||
class VineIE(InfoExtractor):
|
||||
"""Information Extractor for Vine.co"""
|
||||
_VALID_URL = r'(?:https?://)?(?:www\.)?vine\.co/v/(?P<id>\w+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
video_id = mobj.group('id')
|
||||
webpage_url = 'https://vine.co/v/' + video_id
|
||||
webpage = self._download_webpage(webpage_url, video_id)
|
||||
|
||||
self.report_extraction(video_id)
|
||||
|
||||
mobj = re.search(r'<meta property="twitter:player:stream" content="(.+?)"', webpage)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Unable to extract video URL')
|
||||
video_url = mobj.group(1)
|
||||
|
||||
mobj = re.search(r'<meta property="og:title" content="(.+?)"', webpage)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Unable to extract title')
|
||||
video_title = mobj.group(1)
|
||||
|
||||
mobj = re.search(r'<meta property="og:image" content="(.+?)(\?.*?)?"', webpage)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Unable to extract thumbnail')
|
||||
thumbnail = mobj.group(1)
|
||||
|
||||
mobj = re.search(r'<div class="user">.*?<h2>(.+?)</h2>', webpage, re.DOTALL)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Unable to extract uploader')
|
||||
uploader = mobj.group(1)
|
||||
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'title': video_title,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
}]
|
||||
|
||||
class FlickrIE(InfoExtractor):
|
||||
"""Information Extractor for Flickr videos"""
|
||||
_VALID_URL = r'(?:https?://)?(?:www\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
video_id = mobj.group('id')
|
||||
video_uploader_id = mobj.group('uploader_id')
|
||||
webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id
|
||||
webpage = self._download_webpage(webpage_url, video_id)
|
||||
|
||||
mobj = re.search(r"photo_secret: '(\w+)'", webpage)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Unable to extract video secret')
|
||||
secret = mobj.group(1)
|
||||
|
||||
first_url = 'https://secure.flickr.com/apps/video/video_mtl_xml.gne?v=x&photo_id=' + video_id + '&secret=' + secret + '&bitrate=700&target=_self'
|
||||
first_xml = self._download_webpage(first_url, video_id, 'Downloading first data webpage')
|
||||
|
||||
mobj = re.search(r'<Item id="id">(\d+-\d+)</Item>', first_xml)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Unable to extract node_id')
|
||||
node_id = mobj.group(1)
|
||||
|
||||
second_url = 'https://secure.flickr.com/video_playlist.gne?node_id=' + node_id + '&tech=flash&mode=playlist&bitrate=700&secret=' + secret + '&rd=video.yahoo.com&noad=1'
|
||||
second_xml = self._download_webpage(second_url, video_id, 'Downloading second data webpage')
|
||||
|
||||
self.report_extraction(video_id)
|
||||
|
||||
mobj = re.search(r'<STREAM APP="(.+?)" FULLPATH="(.+?)"', second_xml)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Unable to extract video url')
|
||||
video_url = mobj.group(1) + unescapeHTML(mobj.group(2))
|
||||
|
||||
mobj = re.search(r'<meta property="og:title" content=(?:"([^"]+)"|\'([^\']+)\')', webpage)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Unable to extract title')
|
||||
video_title = mobj.group(1) or mobj.group(2)
|
||||
|
||||
mobj = re.search(r'<meta property="og:description" content=(?:"([^"]+)"|\'([^\']+)\')', webpage)
|
||||
if mobj is None:
|
||||
self._downloader.report_warning(u'unable to extract description')
|
||||
video_description = None
|
||||
else:
|
||||
video_description = mobj.group(1) or mobj.group(2)
|
||||
|
||||
mobj = re.search(r'<meta property="og:image" content=(?:"([^"]+)"|\'([^\']+)\')', webpage)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Unable to extract thumbnail')
|
||||
thumbnail = mobj.group(1) or mobj.group(2)
|
||||
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'title': video_title,
|
||||
'description': video_description,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader_id': video_uploader_id,
|
||||
}]
|
||||
|
||||
class TeamcocoIE(InfoExtractor):
|
||||
_VALID_URL = r'http://teamcoco\.com/video/(?P<url_title>.*)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
url_title = mobj.group('url_title')
|
||||
webpage = self._download_webpage(url, url_title)
|
||||
|
||||
mobj = re.search(r'<article class="video" data-id="(\d+?)"', webpage)
|
||||
video_id = mobj.group(1)
|
||||
|
||||
self.report_extraction(video_id)
|
||||
|
||||
mobj = re.search(r'<meta property="og:title" content="(.+?)"', webpage)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Unable to extract title')
|
||||
video_title = mobj.group(1)
|
||||
|
||||
mobj = re.search(r'<meta property="og:image" content="(.+?)"', webpage)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Unable to extract thumbnail')
|
||||
thumbnail = mobj.group(1)
|
||||
|
||||
mobj = re.search(r'<meta property="og:description" content="(.*?)"', webpage)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Unable to extract description')
|
||||
description = mobj.group(1)
|
||||
|
||||
data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
|
||||
data = self._download_webpage(data_url, video_id, 'Downloading data webpage')
|
||||
mobj = re.search(r'<file type="high".*?>(.*?)</file>', data)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Unable to extract video url')
|
||||
video_url = mobj.group(1)
|
||||
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'title': video_title,
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
}]
|
||||
|
||||
def gen_extractors():
|
||||
""" Return a list of an instance of every supported extractor.
|
||||
The order does matter; the first extractor matched is the one handling the URL.
|
||||
@@ -4211,6 +4444,10 @@ def gen_extractors():
|
||||
BandcampIE(),
|
||||
RedTubeIE(),
|
||||
InaIE(),
|
||||
HowcastIE(),
|
||||
VineIE(),
|
||||
FlickrIE(),
|
||||
TeamcocoIE(),
|
||||
GenericIE()
|
||||
]
|
||||
|
||||
|
||||
@@ -85,8 +85,9 @@ class FFmpegPostProcessor(PostProcessor):
|
||||
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
stdout,stderr = p.communicate()
|
||||
if p.returncode != 0:
|
||||
stderr = stderr.decode('utf-8', 'replace')
|
||||
msg = stderr.strip().split('\n')[-1]
|
||||
raise FFmpegPostProcessorError(msg.decode('utf-8', 'replace'))
|
||||
raise FFmpegPostProcessorError(msg)
|
||||
|
||||
def _ffmpeg_filename_argument(self, fn):
|
||||
# ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details
|
||||
|
||||
@@ -26,6 +26,8 @@ __authors__ = (
|
||||
'Osama Khalid',
|
||||
'Michael Walter',
|
||||
'M. Yasoob Ullah Khalid',
|
||||
'Julien Fraichard',
|
||||
'Johny Mo Swag',
|
||||
)
|
||||
|
||||
__license__ = 'Public Domain'
|
||||
@@ -189,8 +191,8 @@ def parseOpts(overrideArguments=None):
|
||||
action='store_true', dest='writesubtitles',
|
||||
help='write subtitle file (currently youtube only)', default=False)
|
||||
video_format.add_option('--only-sub',
|
||||
action='store_true', dest='onlysubtitles',
|
||||
help='downloads only the subtitles (no video)', default=False)
|
||||
action='store_true', dest='skip_download',
|
||||
help='[deprecated] alias of --skip-download', default=False)
|
||||
video_format.add_option('--all-subs',
|
||||
action='store_true', dest='allsubtitles',
|
||||
help='downloads all the available subtitles of the video (currently youtube only)', default=False)
|
||||
@@ -214,6 +216,8 @@ def parseOpts(overrideArguments=None):
|
||||
action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
|
||||
verbosity.add_option('-e', '--get-title',
|
||||
action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
|
||||
verbosity.add_option('--get-id',
|
||||
action='store_true', dest='getid', help='simulate, quiet but print id', default=False)
|
||||
verbosity.add_option('--get-thumbnail',
|
||||
action='store_true', dest='getthumbnail',
|
||||
help='simulate, quiet but print thumbnail URL', default=False)
|
||||
@@ -493,15 +497,16 @@ def _real_main(argv=None):
|
||||
'usenetrc': opts.usenetrc,
|
||||
'username': opts.username,
|
||||
'password': opts.password,
|
||||
'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
|
||||
'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
|
||||
'forceurl': opts.geturl,
|
||||
'forcetitle': opts.gettitle,
|
||||
'forceid': opts.getid,
|
||||
'forcethumbnail': opts.getthumbnail,
|
||||
'forcedescription': opts.getdescription,
|
||||
'forcefilename': opts.getfilename,
|
||||
'forceformat': opts.getformat,
|
||||
'simulate': opts.simulate,
|
||||
'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
|
||||
'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
|
||||
'format': opts.format,
|
||||
'format_limit': opts.format_limit,
|
||||
'listformats': opts.listformats,
|
||||
@@ -527,7 +532,6 @@ def _real_main(argv=None):
|
||||
'writeinfojson': opts.writeinfojson,
|
||||
'writethumbnail': opts.writethumbnail,
|
||||
'writesubtitles': opts.writesubtitles,
|
||||
'onlysubtitles': opts.onlysubtitles,
|
||||
'allsubtitles': opts.allsubtitles,
|
||||
'listsubtitles': opts.listsubtitles,
|
||||
'subtitlesformat': opts.subtitlesformat,
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import errno
|
||||
import gzip
|
||||
import io
|
||||
import json
|
||||
@@ -149,6 +150,10 @@ try:
|
||||
except NameError:
|
||||
compat_chr = chr
|
||||
|
||||
def compat_ord(c):
|
||||
if type(c) is int: return c
|
||||
else: return ord(c)
|
||||
|
||||
std_headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0',
|
||||
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
|
||||
@@ -334,12 +339,20 @@ def sanitize_open(filename, open_mode):
|
||||
stream = open(encodeFilename(filename), open_mode)
|
||||
return (stream, filename)
|
||||
except (IOError, OSError) as err:
|
||||
# In case of error, try to remove win32 forbidden chars
|
||||
filename = re.sub(u'[/<>:"\\|\\\\?\\*]', u'#', filename)
|
||||
if err.errno in (errno.EACCES,):
|
||||
raise
|
||||
|
||||
# An exception here should be caught in the caller
|
||||
stream = open(encodeFilename(filename), open_mode)
|
||||
return (stream, filename)
|
||||
# In case of error, try to remove win32 forbidden chars
|
||||
alt_filename = os.path.join(
|
||||
re.sub(u'[/<>:"\\|\\\\?\\*]', u'#', path_part)
|
||||
for path_part in os.path.split(filename)
|
||||
)
|
||||
if alt_filename == filename:
|
||||
raise
|
||||
else:
|
||||
# An exception here should be caught in the caller
|
||||
stream = open(encodeFilename(filename), open_mode)
|
||||
return (stream, alt_filename)
|
||||
|
||||
|
||||
def timeconvert(timestr):
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
|
||||
__version__ = '2013.05.07'
|
||||
__version__ = '2013.05.23'
|
||||
|
||||
Reference in New Issue
Block a user