mirror of
https://source.netsyms.com/Mirrors/youtube-dl
synced 2026-03-30 23:15:45 +00:00
Compare commits
28 Commits
2014.11.27
...
2014.12.01
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
df015c69ea | ||
|
|
1434bffa1f | ||
|
|
94aa25b995 | ||
|
|
d128cfe393 | ||
|
|
954f36f890 | ||
|
|
19e92770c9 | ||
|
|
95c673a148 | ||
|
|
a196a53265 | ||
|
|
3266f0c68e | ||
|
|
1940fadd53 | ||
|
|
03fd72d996 | ||
|
|
f2b44a2513 | ||
|
|
c522adb1f0 | ||
|
|
7160532d41 | ||
|
|
4e62ebe250 | ||
|
|
4472f84f0c | ||
|
|
b766eb2707 | ||
|
|
10a404c335 | ||
|
|
c056efa2e3 | ||
|
|
283ac8d592 | ||
|
|
313d4572ce | ||
|
|
42939b6129 | ||
|
|
37ea8164d3 | ||
|
|
8c810a7db3 | ||
|
|
248a0b890f | ||
|
|
96b7c7fe3f | ||
|
|
e987e91fcc | ||
|
|
cb6444e197 |
1
AUTHORS
1
AUTHORS
@@ -88,3 +88,4 @@ Dao Hoang Son
|
||||
Oskar Jauch
|
||||
Matthew Rayfield
|
||||
t0mm0
|
||||
Tithen-Firion
|
||||
|
||||
@@ -238,7 +238,7 @@ class TestVimeoSubtitles(BaseTestSubtitles):
|
||||
def test_subtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
|
||||
self.assertEqual(md5(subtitles['en']), '26399116d23ae3cf2c087cea94bc43b4')
|
||||
|
||||
def test_subtitles_lang(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
|
||||
@@ -242,7 +242,7 @@ from .muenchentv import MuenchenTVIE
|
||||
from .musicplayon import MusicPlayOnIE
|
||||
from .musicvault import MusicVaultIE
|
||||
from .muzu import MuzuTVIE
|
||||
from .myspace import MySpaceIE
|
||||
from .myspace import MySpaceIE, MySpaceAlbumIE
|
||||
from .myspass import MySpassIE
|
||||
from .myvideo import MyVideoIE
|
||||
from .naver import NaverIE
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
from ..compat import compat_HTTPError
|
||||
|
||||
|
||||
class BBCCoUkIE(SubtitlesInfoExtractor):
|
||||
@@ -55,7 +57,22 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.bbc.co.uk/iplayer/episode/p026c7jt/tomorrows-worlds-the-unearthly-history-of-science-fiction-2-invasion',
|
||||
'info_dict': {
|
||||
'id': 'b03k3pb7',
|
||||
'ext': 'flv',
|
||||
'title': "Tomorrow's Worlds: The Unearthly History of Science Fiction",
|
||||
'description': '2. Invasion',
|
||||
'duration': 3600,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
|
||||
},
|
||||
]
|
||||
|
||||
def _extract_asx_playlist(self, connection, programme_id):
|
||||
@@ -102,6 +119,10 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
|
||||
return playlist.findall('./{http://bbc.co.uk/2008/emp/playlist}item')
|
||||
|
||||
def _extract_medias(self, media_selection):
|
||||
error = media_selection.find('./{http://bbc.co.uk/2008/mp/mediaselection}error')
|
||||
if error is not None:
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, error.get('id')), expected=True)
|
||||
return media_selection.findall('./{http://bbc.co.uk/2008/mp/mediaselection}media')
|
||||
|
||||
def _extract_connections(self, media):
|
||||
@@ -158,54 +179,73 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
|
||||
subtitles[lang] = srt
|
||||
return subtitles
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
group_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, group_id, 'Downloading video page')
|
||||
if re.search(r'id="emp-error" class="notinuk">', webpage):
|
||||
raise ExtractorError('Currently BBC iPlayer TV programmes are available to play in the UK only',
|
||||
expected=True)
|
||||
|
||||
playlist = self._download_xml('http://www.bbc.co.uk/iplayer/playlist/%s' % group_id, group_id,
|
||||
'Downloading playlist XML')
|
||||
|
||||
no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
|
||||
if no_items is not None:
|
||||
reason = no_items.get('reason')
|
||||
if reason == 'preAvailability':
|
||||
msg = 'Episode %s is not yet available' % group_id
|
||||
elif reason == 'postAvailability':
|
||||
msg = 'Episode %s is no longer available' % group_id
|
||||
def _download_media_selector(self, programme_id):
|
||||
try:
|
||||
media_selection = self._download_xml(
|
||||
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s' % programme_id,
|
||||
programme_id, 'Downloading media selection XML')
|
||||
except ExtractorError as ee:
|
||||
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
|
||||
media_selection = xml.etree.ElementTree.fromstring(ee.cause.read().encode('utf-8'))
|
||||
else:
|
||||
msg = 'Episode %s is not available: %s' % (group_id, reason)
|
||||
raise ExtractorError(msg, expected=True)
|
||||
raise
|
||||
|
||||
formats = []
|
||||
subtitles = None
|
||||
|
||||
for item in self._extract_items(playlist):
|
||||
kind = item.get('kind')
|
||||
if kind != 'programme' and kind != 'radioProgramme':
|
||||
continue
|
||||
title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
|
||||
description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
|
||||
for media in self._extract_medias(media_selection):
|
||||
kind = media.get('kind')
|
||||
if kind == 'audio':
|
||||
formats.extend(self._extract_audio(media, programme_id))
|
||||
elif kind == 'video':
|
||||
formats.extend(self._extract_video(media, programme_id))
|
||||
elif kind == 'captions':
|
||||
subtitles = self._extract_captions(media, programme_id)
|
||||
|
||||
programme_id = item.get('identifier')
|
||||
duration = int(item.get('duration'))
|
||||
return formats, subtitles
|
||||
|
||||
media_selection = self._download_xml(
|
||||
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s' % programme_id,
|
||||
programme_id, 'Downloading media selection XML')
|
||||
def _real_extract(self, url):
|
||||
group_id = self._match_id(url)
|
||||
|
||||
for media in self._extract_medias(media_selection):
|
||||
kind = media.get('kind')
|
||||
if kind == 'audio':
|
||||
formats.extend(self._extract_audio(media, programme_id))
|
||||
elif kind == 'video':
|
||||
formats.extend(self._extract_video(media, programme_id))
|
||||
elif kind == 'captions':
|
||||
subtitles = self._extract_captions(media, programme_id)
|
||||
webpage = self._download_webpage(url, group_id, 'Downloading video page')
|
||||
|
||||
programme_id = self._search_regex(
|
||||
r'"vpid"\s*:\s*"([\da-z]{8})"', webpage, 'vpid', fatal=False)
|
||||
if programme_id:
|
||||
player = self._download_json(
|
||||
'http://www.bbc.co.uk/iplayer/episode/%s.json' % group_id,
|
||||
group_id)['jsConf']['player']
|
||||
title = player['title']
|
||||
description = player['subtitle']
|
||||
duration = player['duration']
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
else:
|
||||
playlist = self._download_xml(
|
||||
'http://www.bbc.co.uk/iplayer/playlist/%s' % group_id,
|
||||
group_id, 'Downloading playlist XML')
|
||||
|
||||
no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
|
||||
if no_items is not None:
|
||||
reason = no_items.get('reason')
|
||||
if reason == 'preAvailability':
|
||||
msg = 'Episode %s is not yet available' % group_id
|
||||
elif reason == 'postAvailability':
|
||||
msg = 'Episode %s is no longer available' % group_id
|
||||
elif reason == 'noMedia':
|
||||
msg = 'Episode %s is not currently available' % group_id
|
||||
else:
|
||||
msg = 'Episode %s is not available: %s' % (group_id, reason)
|
||||
raise ExtractorError(msg, expected=True)
|
||||
|
||||
for item in self._extract_items(playlist):
|
||||
kind = item.get('kind')
|
||||
if kind != 'programme' and kind != 'radioProgramme':
|
||||
continue
|
||||
title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
|
||||
description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
|
||||
programme_id = item.get('identifier')
|
||||
duration = int(item.get('duration'))
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
|
||||
if self._downloader.params.get('listsubtitles', False):
|
||||
self._list_available_subtitles(programme_id, subtitles)
|
||||
|
||||
@@ -64,6 +64,20 @@ class BlipTVIE(SubtitlesInfoExtractor):
|
||||
'uploader': 'redvsblue',
|
||||
'uploader_id': '792887',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://blip.tv/play/gbk766dkj4Yn',
|
||||
'md5': 'fe0a33f022d49399a241e84a8ea8b8e3',
|
||||
'info_dict': {
|
||||
'id': '1749452',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20090208',
|
||||
'description': 'Witness the first appearance of the Nostalgia Critic character, as Doug reviews the movie Transformers.',
|
||||
'title': 'Nostalgia Critic: Transformers',
|
||||
'timestamp': 1234068723,
|
||||
'uploader': 'NostalgiaCritic',
|
||||
'uploader_id': '246467',
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
@@ -74,11 +88,13 @@ class BlipTVIE(SubtitlesInfoExtractor):
|
||||
# See https://github.com/rg3/youtube-dl/issues/857 and
|
||||
# https://github.com/rg3/youtube-dl/issues/4197
|
||||
if lookup_id:
|
||||
info_page = self._download_webpage(
|
||||
'http://blip.tv/play/%s.x?p=1' % lookup_id, lookup_id, 'Resolving lookup id')
|
||||
video_id = self._search_regex(r'config\.id\s*=\s*"([0-9]+)', info_page, 'video_id')
|
||||
else:
|
||||
video_id = mobj.group('id')
|
||||
urlh = self._request_webpage(
|
||||
'http://blip.tv/play/%s' % lookup_id, lookup_id, 'Resolving lookup id')
|
||||
url = compat_urlparse.urlparse(urlh.geturl())
|
||||
qs = compat_urlparse.parse_qs(url.query)
|
||||
mobj = re.match(self._VALID_URL, qs['file'][0])
|
||||
|
||||
video_id = mobj.group('id')
|
||||
|
||||
rss = self._download_xml('http://blip.tv/rss/flash/%s' % video_id, video_id, 'Downloading video RSS')
|
||||
|
||||
@@ -114,7 +130,7 @@ class BlipTVIE(SubtitlesInfoExtractor):
|
||||
msg = self._download_webpage(
|
||||
url + '?showplayer=20140425131715&referrer=http://blip.tv&mask=7&skin=flashvars&view=url',
|
||||
video_id, 'Resolving URL for %s' % role)
|
||||
real_url = compat_urlparse.parse_qs(msg)['message'][0]
|
||||
real_url = compat_urlparse.parse_qs(msg.strip())['message'][0]
|
||||
|
||||
media_type = media_content.get('type')
|
||||
if media_type == 'text/srt' or url.endswith('.srt'):
|
||||
|
||||
@@ -13,6 +13,7 @@ import time
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from ..compat import (
|
||||
compat_cookiejar,
|
||||
compat_http_client,
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse_urlparse,
|
||||
@@ -817,6 +818,11 @@ class InfoExtractor(object):
|
||||
self._downloader.report_warning(msg)
|
||||
return res
|
||||
|
||||
def _set_cookie(self, domain, name, value, expire_time=None):
|
||||
cookie = compat_cookiejar.Cookie(0, name, value, None, None, domain, None,
|
||||
None, '/', True, False, expire_time, '', None, None, None)
|
||||
self._downloader.cookiejar.set_cookie(cookie)
|
||||
|
||||
|
||||
class SearchInfoExtractor(InfoExtractor):
|
||||
"""
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
@@ -7,6 +8,7 @@ from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class MySpaceIE(InfoExtractor):
|
||||
@@ -14,33 +16,58 @@ class MySpaceIE(InfoExtractor):
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://myspace.com/coldplay/video/viva-la-vida/100008689',
|
||||
'url': 'https://myspace.com/fiveminutestothestage/video/little-big-town/109594919',
|
||||
'info_dict': {
|
||||
'id': '100008689',
|
||||
'id': '109594919',
|
||||
'ext': 'flv',
|
||||
'title': 'Viva La Vida',
|
||||
'description': 'The official Viva La Vida video, directed by Hype Williams',
|
||||
'uploader': 'Coldplay',
|
||||
'uploader_id': 'coldplay',
|
||||
'title': 'Little Big Town',
|
||||
'description': 'This country quartet was all smiles while playing a sold out show at the Pacific Amphitheatre in Orange County, California.',
|
||||
'uploader': 'Five Minutes to the Stage',
|
||||
'uploader_id': 'fiveminutestothestage',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# song
|
||||
# songs
|
||||
{
|
||||
'url': 'https://myspace.com/spiderbags/music/song/darkness-in-my-heart-39008454-27041242',
|
||||
'url': 'https://myspace.com/killsorrow/music/song/of-weakened-soul...-93388656-103880681',
|
||||
'info_dict': {
|
||||
'id': '39008454',
|
||||
'id': '93388656',
|
||||
'ext': 'flv',
|
||||
'title': 'Darkness In My Heart',
|
||||
'uploader_id': 'spiderbags',
|
||||
'title': 'Of weakened soul...',
|
||||
'uploader': 'Killsorrow',
|
||||
'uploader_id': 'killsorrow',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'add_ie': ['Vevo'],
|
||||
'url': 'https://myspace.com/threedaysgrace/music/song/animal-i-have-become-28400208-28218041',
|
||||
'info_dict': {
|
||||
'id': 'USZM20600099',
|
||||
'ext': 'mp4',
|
||||
'title': 'Animal I Have Become',
|
||||
'uploader': 'Three Days Grace',
|
||||
'timestamp': int,
|
||||
'upload_date': '20060502',
|
||||
},
|
||||
'skip': 'VEVO is only available in some countries',
|
||||
}, {
|
||||
'add_ie': ['Youtube'],
|
||||
'url': 'https://myspace.com/starset2/music/song/first-light-95799905-106964426',
|
||||
'info_dict': {
|
||||
'id': 'ypWvQgnJrSU',
|
||||
'ext': 'mp4',
|
||||
'title': 'Starset - First Light',
|
||||
'description': 'md5:2d5db6c9d11d527683bcda818d332414',
|
||||
'uploader': 'Jacob Soren',
|
||||
'uploader_id': 'SorenPromotions',
|
||||
'upload_date': '20140725',
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
@@ -48,16 +75,40 @@ class MySpaceIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
player_url = self._search_regex(
|
||||
r'playerSwf":"([^"?]*)', webpage, 'player URL')
|
||||
|
||||
if mobj.group('mediatype').startswith('music/song'):
|
||||
# songs don't store any useful info in the 'context' variable
|
||||
song_data = self._search_regex(
|
||||
r'''<button.*data-song-id=(["\'])%s\1.*''' % video_id,
|
||||
webpage, 'song_data', default=None, group=0)
|
||||
if song_data is None:
|
||||
# some songs in an album are not playable
|
||||
self.report_warning(
|
||||
'%s: No downloadable song on this page' % video_id)
|
||||
return
|
||||
def search_data(name):
|
||||
return self._search_regex(
|
||||
r'data-%s="(.*?)"' % name, webpage, name)
|
||||
r'''data-%s=([\'"])(?P<data>.*?)\1''' % name,
|
||||
song_data, name, default='', group='data')
|
||||
streamUrl = search_data('stream-url')
|
||||
if not streamUrl:
|
||||
vevo_id = search_data('vevo-id')
|
||||
youtube_id = search_data('youtube-id')
|
||||
if vevo_id:
|
||||
self.to_screen('Vevo video detected: %s' % vevo_id)
|
||||
return self.url_result('vevo:%s' % vevo_id, ie='Vevo')
|
||||
elif youtube_id:
|
||||
self.to_screen('Youtube video detected: %s' % youtube_id)
|
||||
return self.url_result(youtube_id, ie='Youtube')
|
||||
else:
|
||||
raise ExtractorError(
|
||||
'Found song but don\'t know how to download it')
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'uploader': search_data('artist-name'),
|
||||
'uploader_id': search_data('artist-username'),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
}
|
||||
@@ -79,6 +130,50 @@ class MySpaceIE(InfoExtractor):
|
||||
info.update({
|
||||
'url': rtmp_url,
|
||||
'play_path': play_path,
|
||||
'player_url': player_url,
|
||||
'ext': 'flv',
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
class MySpaceAlbumIE(InfoExtractor):
|
||||
IE_NAME = 'MySpace:album'
|
||||
_VALID_URL = r'https?://myspace\.com/([^/]+)/music/album/(?P<title>.*-)(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://myspace.com/starset2/music/album/transmissions-19455773',
|
||||
'info_dict': {
|
||||
'title': 'Transmissions',
|
||||
'id': '19455773',
|
||||
},
|
||||
'playlist_count': 14,
|
||||
'skip': 'this album is only available in some countries',
|
||||
}, {
|
||||
'url': 'https://myspace.com/killsorrow/music/album/the-demo-18596029',
|
||||
'info_dict': {
|
||||
'title': 'The Demo',
|
||||
'id': '18596029',
|
||||
},
|
||||
'playlist_count': 5,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
playlist_id = mobj.group('id')
|
||||
display_id = mobj.group('title') + playlist_id
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
tracks_paths = re.findall(r'"music:song" content="(.*?)"', webpage)
|
||||
if not tracks_paths:
|
||||
raise ExtractorError(
|
||||
'%s: No songs found, try using proxy' % display_id,
|
||||
expected=True)
|
||||
entries = [
|
||||
self.url_result(t_path, ie=MySpaceIE.ie_key())
|
||||
for t_path in tracks_paths]
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': playlist_id,
|
||||
'display_id': display_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'entries': entries,
|
||||
}
|
||||
|
||||
@@ -20,6 +20,7 @@ class NocoIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)'
|
||||
_LOGIN_URL = 'http://noco.tv/do.php'
|
||||
_API_URL_TEMPLATE = 'https://api.noco.tv/1.1/%s?ts=%s&tk=%s'
|
||||
_SUB_LANG_TEMPLATE = '&sub_lang=%s'
|
||||
_NETRC_MACHINE = 'noco'
|
||||
|
||||
_TEST = {
|
||||
@@ -60,10 +61,12 @@ class NocoIE(InfoExtractor):
|
||||
if 'erreur' in login:
|
||||
raise ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True)
|
||||
|
||||
def _call_api(self, path, video_id, note):
|
||||
def _call_api(self, path, video_id, note, sub_lang=None):
|
||||
ts = compat_str(int(time.time() * 1000))
|
||||
tk = hashlib.md5((hashlib.md5(ts.encode('ascii')).hexdigest() + '#8S?uCraTedap6a').encode('ascii')).hexdigest()
|
||||
url = self._API_URL_TEMPLATE % (path, ts, tk)
|
||||
if sub_lang:
|
||||
url += self._SUB_LANG_TEMPLATE % sub_lang
|
||||
|
||||
resp = self._download_json(url, video_id, note)
|
||||
|
||||
@@ -91,31 +94,34 @@ class NocoIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
|
||||
for format_id, fmt in medias['fr']['video_list']['none']['quality_list'].items():
|
||||
for lang, lang_dict in medias['fr']['video_list'].items():
|
||||
for format_id, fmt in lang_dict['quality_list'].items():
|
||||
format_id_extended = '%s-%s' % (lang, format_id) if lang != 'none' else format_id
|
||||
|
||||
video = self._call_api(
|
||||
'shows/%s/video/%s/fr' % (video_id, format_id.lower()),
|
||||
video_id, 'Downloading %s video JSON' % format_id)
|
||||
video = self._call_api(
|
||||
'shows/%s/video/%s/fr' % (video_id, format_id.lower()),
|
||||
video_id, 'Downloading %s video JSON' % format_id_extended,
|
||||
lang if lang != 'none' else None)
|
||||
|
||||
file_url = video['file']
|
||||
if not file_url:
|
||||
continue
|
||||
file_url = video['file']
|
||||
if not file_url:
|
||||
continue
|
||||
|
||||
if file_url in ['forbidden', 'not found']:
|
||||
popmessage = video['popmessage']
|
||||
self._raise_error(popmessage['title'], popmessage['message'])
|
||||
if file_url in ['forbidden', 'not found']:
|
||||
popmessage = video['popmessage']
|
||||
self._raise_error(popmessage['title'], popmessage['message'])
|
||||
|
||||
formats.append({
|
||||
'url': file_url,
|
||||
'format_id': format_id,
|
||||
'width': fmt['res_width'],
|
||||
'height': fmt['res_lines'],
|
||||
'abr': fmt['audiobitrate'],
|
||||
'vbr': fmt['videobitrate'],
|
||||
'filesize': fmt['filesize'],
|
||||
'format_note': qualities[format_id]['quality_name'],
|
||||
'preference': qualities[format_id]['priority'],
|
||||
})
|
||||
formats.append({
|
||||
'url': file_url,
|
||||
'format_id': format_id_extended,
|
||||
'width': fmt['res_width'],
|
||||
'height': fmt['res_lines'],
|
||||
'abr': fmt['audiobitrate'],
|
||||
'vbr': fmt['videobitrate'],
|
||||
'filesize': fmt['filesize'],
|
||||
'format_note': qualities[format_id]['quality_name'],
|
||||
'preference': qualities[format_id]['priority'],
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
||||
@@ -4,6 +4,8 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
compat_urllib_parse,
|
||||
)
|
||||
|
||||
@@ -28,6 +30,11 @@ class PlayvidIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
m_error = re.search(
|
||||
r'<div class="block-error">\s*<div class="heading">\s*<div>(?P<msg>.+?)</div>\s*</div>', webpage)
|
||||
if m_error:
|
||||
raise ExtractorError(clean_html(m_error.group('msg')), expected=True)
|
||||
|
||||
video_title = None
|
||||
duration = None
|
||||
video_thumbnail = None
|
||||
|
||||
@@ -39,7 +39,7 @@ class SlideshareIE(InfoExtractor):
|
||||
ext = info['jsplayer']['video_extension']
|
||||
video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext)
|
||||
description = self._html_search_regex(
|
||||
r'<p\s+(?:style="[^"]*"\s+)?class="description.*?"[^>]*>(.*?)</p>', webpage,
|
||||
r'<p\s+(?:style="[^"]*"\s+)?class=".*?description.*?"[^>]*>(.*?)</p>', webpage,
|
||||
'description', fatal=False)
|
||||
|
||||
return {
|
||||
|
||||
@@ -19,6 +19,7 @@ class TuneInIE(InfoExtractor):
|
||||
|tun\.in/(?P<redirect_id>[A-Za-z0-9]+)
|
||||
)
|
||||
'''
|
||||
_API_URL_TEMPLATE = 'http://tunein.com/tuner/tune/?stationId={0:}&tuneType=Station'
|
||||
|
||||
_INFO_DICT = {
|
||||
'id': '34682',
|
||||
@@ -56,13 +57,10 @@ class TuneInIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
station_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(
|
||||
url, station_id, note='Downloading station webpage')
|
||||
station_info = self._download_json(
|
||||
self._API_URL_TEMPLATE.format(station_id),
|
||||
station_id, note='Downloading station JSON')
|
||||
|
||||
payload = self._html_search_regex(
|
||||
r'(?m)TuneIn\.payload\s*=\s*(\{[^$]+?)$', webpage, 'JSON data')
|
||||
json_data = json.loads(payload)
|
||||
station_info = json_data['Station']['broadcast']
|
||||
title = station_info['Title']
|
||||
thumbnail = station_info.get('Logo')
|
||||
location = station_info.get('Location')
|
||||
|
||||
@@ -13,7 +13,7 @@ from ..utils import (
|
||||
class VevoIE(InfoExtractor):
|
||||
"""
|
||||
Accepts urls from vevo.com or in the format 'vevo:{id}'
|
||||
(currently used by MTVIE)
|
||||
(currently used by MTVIE and MySpaceIE)
|
||||
"""
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:https?://www\.vevo\.com/watch/(?:[^/]+/(?:[^/]+/)?)?|
|
||||
|
||||
@@ -50,7 +50,7 @@ class XMinusIE(InfoExtractor):
|
||||
webpage, 'view count', fatal=False))
|
||||
|
||||
enc_token = self._html_search_regex(
|
||||
r'data-mt="(.*?)"', webpage, 'enc_token')
|
||||
r'minus_track\.tkn="(.+?)"', webpage, 'enc_token')
|
||||
token = ''.join(
|
||||
c if pos == 3 else compat_chr(compat_ord(c) - 1)
|
||||
for pos, c in enumerate(reversed(enc_token)))
|
||||
|
||||
@@ -7,6 +7,7 @@ import itertools
|
||||
import json
|
||||
import os.path
|
||||
import re
|
||||
import time
|
||||
import traceback
|
||||
|
||||
from .common import InfoExtractor, SearchInfoExtractor
|
||||
@@ -38,17 +39,14 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
"""Provide base functions for Youtube extractors"""
|
||||
_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
|
||||
_TWOFACTOR_URL = 'https://accounts.google.com/SecondFactor'
|
||||
_LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
|
||||
_AGE_URL = 'https://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
|
||||
_NETRC_MACHINE = 'youtube'
|
||||
# If True it will raise an error if no login info is provided
|
||||
_LOGIN_REQUIRED = False
|
||||
|
||||
def _set_language(self):
|
||||
return bool(self._download_webpage(
|
||||
self._LANG_URL, None,
|
||||
note='Setting language', errnote='unable to set language',
|
||||
fatal=False))
|
||||
self._set_cookie('.youtube.com', 'PREF', 'f1=50000000&hl=en',
|
||||
# YouTube sets the expire time to about two months
|
||||
expire_time=time.time() + 60*24*3600)
|
||||
|
||||
def _login(self):
|
||||
"""
|
||||
@@ -176,30 +174,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
return False
|
||||
return True
|
||||
|
||||
def _confirm_age(self):
|
||||
age_form = {
|
||||
'next_url': '/',
|
||||
'action_confirm': 'Confirm',
|
||||
}
|
||||
req = compat_urllib_request.Request(
|
||||
self._AGE_URL,
|
||||
compat_urllib_parse.urlencode(age_form).encode('ascii')
|
||||
)
|
||||
|
||||
self._download_webpage(
|
||||
req, None,
|
||||
note='Confirming age', errnote='Unable to confirm age',
|
||||
fatal=False)
|
||||
|
||||
def _real_initialize(self):
|
||||
if self._downloader is None:
|
||||
return
|
||||
if self._get_login_info()[0] is not None:
|
||||
if not self._set_language():
|
||||
return
|
||||
self._set_language()
|
||||
if not self._login():
|
||||
return
|
||||
self._confirm_age()
|
||||
|
||||
|
||||
class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
@@ -305,6 +285,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
|
||||
'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
|
||||
'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'VP9'},
|
||||
|
||||
# Dash webm audio
|
||||
'171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
|
||||
@@ -398,8 +379,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'IB3lcPjvWLA',
|
||||
'ext': 'm4a',
|
||||
'title': 'Afrojack - The Spark ft. Spree Wilson',
|
||||
'description': 'md5:9717375db5a9a3992be4668bbf3bc0a8',
|
||||
'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
|
||||
'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d',
|
||||
'uploader': 'AfrojackVEVO',
|
||||
'uploader_id': 'AfrojackVEVO',
|
||||
'upload_date': '20131011',
|
||||
@@ -421,7 +402,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
'title': 'Burning Everyone\'s Koran',
|
||||
'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
|
||||
}
|
||||
}
|
||||
},
|
||||
# Normal age-gate video (No vevo, embed allowed)
|
||||
{
|
||||
'url': 'http://youtube.com/watch?v=HtVdAasjOgU',
|
||||
'info_dict': {
|
||||
'id': 'HtVdAasjOgU',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
|
||||
'description': 'md5:eca57043abae25130f58f655ad9a7771',
|
||||
'uploader': 'The Witcher',
|
||||
'uploader_id': 'WitcherGame',
|
||||
'upload_date': '20140605',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
@@ -684,16 +678,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
|
||||
# Get video webpage
|
||||
url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
|
||||
pref_cookies = [
|
||||
c for c in self._downloader.cookiejar
|
||||
if c.domain == '.youtube.com' and c.name == 'PREF']
|
||||
for pc in pref_cookies:
|
||||
if 'hl=' in pc.value:
|
||||
pc.value = re.sub(r'hl=[^&]+', 'hl=en', pc.value)
|
||||
else:
|
||||
if pc.value:
|
||||
pc.value += '&'
|
||||
pc.value += 'hl=en'
|
||||
video_webpage = self._download_webpage(url, video_id)
|
||||
|
||||
# Attempt to extract SWF player URL
|
||||
@@ -704,7 +688,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
player_url = None
|
||||
|
||||
# Get video info
|
||||
self.report_video_info_webpage_download(video_id)
|
||||
if re.search(r'player-age-gate-content">', video_webpage) is not None:
|
||||
age_gate = True
|
||||
# We simulate the access to the video from www.youtube.com/v/{video_id}
|
||||
@@ -723,15 +706,30 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
video_info = compat_parse_qs(video_info_webpage)
|
||||
else:
|
||||
age_gate = False
|
||||
for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
|
||||
video_info_url = (proto + '://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
|
||||
% (video_id, el_type))
|
||||
video_info_webpage = self._download_webpage(video_info_url, video_id,
|
||||
note=False,
|
||||
errnote='unable to download video info webpage')
|
||||
video_info = compat_parse_qs(video_info_webpage)
|
||||
if 'token' in video_info:
|
||||
break
|
||||
try:
|
||||
# Try looking directly into the video webpage
|
||||
mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});', video_webpage)
|
||||
if not mobj:
|
||||
raise ValueError('Could not find ytplayer.config') # caught below
|
||||
json_code = uppercase_escape(mobj.group(1))
|
||||
ytplayer_config = json.loads(json_code)
|
||||
args = ytplayer_config['args']
|
||||
# Convert to the same format returned by compat_parse_qs
|
||||
video_info = dict((k, [v]) for k, v in args.items())
|
||||
if 'url_encoded_fmt_stream_map' not in args:
|
||||
raise ValueError('No stream_map present') # caught below
|
||||
except ValueError:
|
||||
# We fallback to the get_video_info pages (used by the embed page)
|
||||
self.report_video_info_webpage_download(video_id)
|
||||
for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
|
||||
video_info_url = (proto + '://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
|
||||
% (video_id, el_type))
|
||||
video_info_webpage = self._download_webpage(video_info_url,
|
||||
video_id, note=False,
|
||||
errnote='unable to download video info webpage')
|
||||
video_info = compat_parse_qs(video_info_webpage)
|
||||
if 'token' in video_info:
|
||||
break
|
||||
if 'token' not in video_info:
|
||||
if 'reason' in video_info:
|
||||
raise ExtractorError(
|
||||
@@ -856,32 +854,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
if self._downloader.params.get('writeannotations', False):
|
||||
video_annotations = self._extract_annotations(video_id)
|
||||
|
||||
# Decide which formats to download
|
||||
try:
|
||||
mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});', video_webpage)
|
||||
if not mobj:
|
||||
raise ValueError('Could not find vevo ID')
|
||||
json_code = uppercase_escape(mobj.group(1))
|
||||
ytplayer_config = json.loads(json_code)
|
||||
args = ytplayer_config['args']
|
||||
# Easy way to know if the 's' value is in url_encoded_fmt_stream_map
|
||||
# this signatures are encrypted
|
||||
if 'url_encoded_fmt_stream_map' not in args:
|
||||
raise ValueError('No stream_map present') # caught below
|
||||
re_signature = re.compile(r'[&,]s=')
|
||||
m_s = re_signature.search(args['url_encoded_fmt_stream_map'])
|
||||
if m_s is not None:
|
||||
self.to_screen('%s: Encrypted signatures detected.' % video_id)
|
||||
video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
|
||||
m_s = re_signature.search(args.get('adaptive_fmts', ''))
|
||||
if m_s is not None:
|
||||
if 'adaptive_fmts' in video_info:
|
||||
video_info['adaptive_fmts'][0] += ',' + args['adaptive_fmts']
|
||||
else:
|
||||
video_info['adaptive_fmts'] = [args['adaptive_fmts']]
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
def _map_to_format_list(urlmap):
|
||||
formats = []
|
||||
for itag, video_real_url in urlmap.items():
|
||||
@@ -974,10 +946,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
# However, in the case of an age restriction there won't be any embedded dashmpd in the video_webpage.
|
||||
# Luckily, it seems, this case uses some kind of default signature (len == 86), so the
|
||||
# combination of get_video_info and the _static_decrypt_signature() decryption fallback will work here.
|
||||
if age_gate:
|
||||
dash_manifest_url = video_info.get('dashmpd')[0]
|
||||
else:
|
||||
dash_manifest_url = ytplayer_config['args']['dashmpd']
|
||||
dash_manifest_url = video_info.get('dashmpd')[0]
|
||||
|
||||
def decrypt_sig(mobj):
|
||||
s = mobj.group(1)
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2014.11.27'
|
||||
__version__ = '2014.12.01'
|
||||
|
||||
Reference in New Issue
Block a user