1
0
mirror of https://source.netsyms.com/Mirrors/youtube-dl synced 2026-03-30 23:15:45 +00:00

Compare commits

..

28 Commits

Author SHA1 Message Date
Philipp Hagemeister
df015c69ea release 2014.12.01 2014-12-01 17:28:34 +01:00
Naglis Jonaitis
1434bffa1f [tunein] Use station API 2014-12-01 18:10:15 +02:00
Jaime Marquínez Ferrándiz
94aa25b995 Credit @Tithen-Firion for the myspace changes (#4341) 2014-12-01 16:15:09 +01:00
Sergey M․
d128cfe393 [slideshare] Fix description extraction 2014-12-01 20:18:42 +06:00
Jaime Marquínez Ferrándiz
954f36f890 [myspace] Cleanup 2014-12-01 00:10:12 +01:00
Jaime Marquínez Ferrándiz
19e92770c9 [myspace] Replace removed test video and fix the others 2014-12-01 00:10:12 +01:00
Tithen-Firion
95c673a148 [myspace] Add extractor for albums 2014-12-01 00:10:12 +01:00
Tithen-Firion
a196a53265 [myspace] Update tests 2014-12-01 00:10:12 +01:00
Tithen-Firion
3266f0c68e [myspace] Redirect to other extractors
There are many songs just linked from Vevo/YouTube to MySpace.
Vevo example: https://myspace.com/threedaysgrace/music/song/animal-i-have-become-28400208-28218041
YouTube example: https://myspace.com/starset2/music/song/first-light-95799905-106964426
2014-12-01 00:10:12 +01:00
Tithen-Firion
1940fadd53 [myspace] Handle non-playable songs
I'm adding this because sometimes there is a song page, but you cannot play it.
Example: https://myspace.com/starset2/music/song/let-it-die-maniac-agenda-remix-bonus-track-95799916-106964439
It will be useful for downloading whole album with songs like this.
2014-12-01 00:10:11 +01:00
Tithen-Firion
03fd72d996 [myspace] Add more data to info dict
`uploader` is an artist
`playlist` is an album
2014-12-01 00:10:11 +01:00
Tithen-Firion
f2b44a2513 [myspace] Use player_url for faster download
It keeps reconnecting without it. Download time decreased from 7+ minutes to 25 seconds for me.
2014-12-01 00:10:11 +01:00
Jaime Marquínez Ferrándiz
c522adb1f0 [youtube] Add a normal age-gate test video 2014-11-30 21:45:49 +01:00
Jaime Marquínez Ferrándiz
7160532d41 [youtube] Simplify code for getting the dash manifest url
video_info contains now the 'ytplayer.config.args' dictionary
2014-11-30 21:07:50 +01:00
Jaime Marquínez Ferrándiz
4e62ebe250 [youtube] Try to extract the video_info from the webpage before requesting the 'get_video_info' pages
The YouTube player doesn't seem to use them except for embedded videos, so we can skip a network request.
But they still provide better error mesagges (for removed videos for example).
2014-11-30 20:56:32 +01:00
Jaime Marquínez Ferrándiz
4472f84f0c [test/test_subtitles] Update checksum for vimeo subtitle file 2014-11-30 19:42:54 +01:00
Jaime Marquínez Ferrándiz
b766eb2707 [youtube] Update test 2014-11-30 19:18:39 +01:00
Jaime Marquínez Ferrándiz
10a404c335 [youtube] Add format 313 (fixes #4339) 2014-11-30 18:56:14 +01:00
Sergey M․
c056efa2e3 [bbccouk] Fix extraction (#4104, #4214) 2014-11-30 22:37:56 +06:00
Philipp Hagemeister
283ac8d592 Merge pull request #4338 from t0mm0/x-minus-fix
[xminus] update tkn extraction regex
2014-11-30 17:11:05 +01:00
t0mm0
313d4572ce [xminus] update tkn extraction regex 2014-11-30 16:04:04 +00:00
Jaime Marquínez Ferrándiz
42939b6129 [youtube] Use a cookie for seeting the language
This way, we don't have to do an aditional request
2014-11-30 00:03:59 +01:00
Jaime Marquínez Ferrándiz
37ea8164d3 [youtube] Don't confirm age when initializing
It seems that all the videos with age restriction use now the age gate method, which doesn't require any confirmation.
2014-11-29 23:46:39 +01:00
Jaime Marquínez Ferrándiz
8c810a7db3 Merge pull request #4333 from ymln/bliptv-fixes
[bliptv] Fix some videos not downloading
2014-11-29 20:20:45 +01:00
Yuriy Melnyk
248a0b890f [bliptv] Fix \n\n at the end of real_url
See https://github.com/rg3/youtube-dl/issues/3544#issuecomment-53166516
2014-11-29 19:17:56 +02:00
Yuriy Melnyk
96b7c7fe3f [bliptv] Fix resolution of lookup id in some videos
In some videos (for example, http://blip.tv/play/gbk766dkj4Yn) resolving
lookup id would fail, because page at
http://blip.tv/play/gbk766dkj4Yn.x?p=1 would have no "config.id" in
it. Fixed by requesting different URL and inspecting the URL which the
client is redirected to.
2014-11-29 19:17:56 +02:00
Sergey M․
e987e91fcc [playvid] Capture and output error message 2014-11-29 22:16:35 +06:00
Sergey M․
cb6444e197 [noco] Add support for multi language videos (Closes #4326) 2014-11-28 20:38:47 +06:00
15 changed files with 309 additions and 171 deletions

View File

@@ -88,3 +88,4 @@ Dao Hoang Son
Oskar Jauch
Matthew Rayfield
t0mm0
Tithen-Firion

View File

@@ -238,7 +238,7 @@ class TestVimeoSubtitles(BaseTestSubtitles):
def test_subtitles(self):
self.DL.params['writesubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
self.assertEqual(md5(subtitles['en']), '26399116d23ae3cf2c087cea94bc43b4')
def test_subtitles_lang(self):
self.DL.params['writesubtitles'] = True

View File

@@ -242,7 +242,7 @@ from .muenchentv import MuenchenTVIE
from .musicplayon import MusicPlayOnIE
from .musicvault import MusicVaultIE
from .muzu import MuzuTVIE
from .myspace import MySpaceIE
from .myspace import MySpaceIE, MySpaceAlbumIE
from .myspass import MySpassIE
from .myvideo import MyVideoIE
from .naver import NaverIE

View File

@@ -1,9 +1,11 @@
from __future__ import unicode_literals
import re
import xml.etree.ElementTree
from .subtitles import SubtitlesInfoExtractor
from ..utils import ExtractorError
from ..compat import compat_HTTPError
class BBCCoUkIE(SubtitlesInfoExtractor):
@@ -55,7 +57,22 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
'skip_download': True,
},
'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
}
},
{
'url': 'http://www.bbc.co.uk/iplayer/episode/p026c7jt/tomorrows-worlds-the-unearthly-history-of-science-fiction-2-invasion',
'info_dict': {
'id': 'b03k3pb7',
'ext': 'flv',
'title': "Tomorrow's Worlds: The Unearthly History of Science Fiction",
'description': '2. Invasion',
'duration': 3600,
},
'params': {
# rtmp download
'skip_download': True,
},
'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
},
]
def _extract_asx_playlist(self, connection, programme_id):
@@ -102,6 +119,10 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
return playlist.findall('./{http://bbc.co.uk/2008/emp/playlist}item')
def _extract_medias(self, media_selection):
error = media_selection.find('./{http://bbc.co.uk/2008/mp/mediaselection}error')
if error is not None:
raise ExtractorError(
'%s returned error: %s' % (self.IE_NAME, error.get('id')), expected=True)
return media_selection.findall('./{http://bbc.co.uk/2008/mp/mediaselection}media')
def _extract_connections(self, media):
@@ -158,54 +179,73 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
subtitles[lang] = srt
return subtitles
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
group_id = mobj.group('id')
webpage = self._download_webpage(url, group_id, 'Downloading video page')
if re.search(r'id="emp-error" class="notinuk">', webpage):
raise ExtractorError('Currently BBC iPlayer TV programmes are available to play in the UK only',
expected=True)
playlist = self._download_xml('http://www.bbc.co.uk/iplayer/playlist/%s' % group_id, group_id,
'Downloading playlist XML')
no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
if no_items is not None:
reason = no_items.get('reason')
if reason == 'preAvailability':
msg = 'Episode %s is not yet available' % group_id
elif reason == 'postAvailability':
msg = 'Episode %s is no longer available' % group_id
def _download_media_selector(self, programme_id):
try:
media_selection = self._download_xml(
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s' % programme_id,
programme_id, 'Downloading media selection XML')
except ExtractorError as ee:
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
media_selection = xml.etree.ElementTree.fromstring(ee.cause.read().encode('utf-8'))
else:
msg = 'Episode %s is not available: %s' % (group_id, reason)
raise ExtractorError(msg, expected=True)
raise
formats = []
subtitles = None
for item in self._extract_items(playlist):
kind = item.get('kind')
if kind != 'programme' and kind != 'radioProgramme':
continue
title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
for media in self._extract_medias(media_selection):
kind = media.get('kind')
if kind == 'audio':
formats.extend(self._extract_audio(media, programme_id))
elif kind == 'video':
formats.extend(self._extract_video(media, programme_id))
elif kind == 'captions':
subtitles = self._extract_captions(media, programme_id)
programme_id = item.get('identifier')
duration = int(item.get('duration'))
return formats, subtitles
media_selection = self._download_xml(
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s' % programme_id,
programme_id, 'Downloading media selection XML')
def _real_extract(self, url):
group_id = self._match_id(url)
for media in self._extract_medias(media_selection):
kind = media.get('kind')
if kind == 'audio':
formats.extend(self._extract_audio(media, programme_id))
elif kind == 'video':
formats.extend(self._extract_video(media, programme_id))
elif kind == 'captions':
subtitles = self._extract_captions(media, programme_id)
webpage = self._download_webpage(url, group_id, 'Downloading video page')
programme_id = self._search_regex(
r'"vpid"\s*:\s*"([\da-z]{8})"', webpage, 'vpid', fatal=False)
if programme_id:
player = self._download_json(
'http://www.bbc.co.uk/iplayer/episode/%s.json' % group_id,
group_id)['jsConf']['player']
title = player['title']
description = player['subtitle']
duration = player['duration']
formats, subtitles = self._download_media_selector(programme_id)
else:
playlist = self._download_xml(
'http://www.bbc.co.uk/iplayer/playlist/%s' % group_id,
group_id, 'Downloading playlist XML')
no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
if no_items is not None:
reason = no_items.get('reason')
if reason == 'preAvailability':
msg = 'Episode %s is not yet available' % group_id
elif reason == 'postAvailability':
msg = 'Episode %s is no longer available' % group_id
elif reason == 'noMedia':
msg = 'Episode %s is not currently available' % group_id
else:
msg = 'Episode %s is not available: %s' % (group_id, reason)
raise ExtractorError(msg, expected=True)
for item in self._extract_items(playlist):
kind = item.get('kind')
if kind != 'programme' and kind != 'radioProgramme':
continue
title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
programme_id = item.get('identifier')
duration = int(item.get('duration'))
formats, subtitles = self._download_media_selector(programme_id)
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(programme_id, subtitles)

View File

@@ -64,6 +64,20 @@ class BlipTVIE(SubtitlesInfoExtractor):
'uploader': 'redvsblue',
'uploader_id': '792887',
}
},
{
'url': 'http://blip.tv/play/gbk766dkj4Yn',
'md5': 'fe0a33f022d49399a241e84a8ea8b8e3',
'info_dict': {
'id': '1749452',
'ext': 'mp4',
'upload_date': '20090208',
'description': 'Witness the first appearance of the Nostalgia Critic character, as Doug reviews the movie Transformers.',
'title': 'Nostalgia Critic: Transformers',
'timestamp': 1234068723,
'uploader': 'NostalgiaCritic',
'uploader_id': '246467',
}
}
]
@@ -74,11 +88,13 @@ class BlipTVIE(SubtitlesInfoExtractor):
# See https://github.com/rg3/youtube-dl/issues/857 and
# https://github.com/rg3/youtube-dl/issues/4197
if lookup_id:
info_page = self._download_webpage(
'http://blip.tv/play/%s.x?p=1' % lookup_id, lookup_id, 'Resolving lookup id')
video_id = self._search_regex(r'config\.id\s*=\s*"([0-9]+)', info_page, 'video_id')
else:
video_id = mobj.group('id')
urlh = self._request_webpage(
'http://blip.tv/play/%s' % lookup_id, lookup_id, 'Resolving lookup id')
url = compat_urlparse.urlparse(urlh.geturl())
qs = compat_urlparse.parse_qs(url.query)
mobj = re.match(self._VALID_URL, qs['file'][0])
video_id = mobj.group('id')
rss = self._download_xml('http://blip.tv/rss/flash/%s' % video_id, video_id, 'Downloading video RSS')
@@ -114,7 +130,7 @@ class BlipTVIE(SubtitlesInfoExtractor):
msg = self._download_webpage(
url + '?showplayer=20140425131715&referrer=http://blip.tv&mask=7&skin=flashvars&view=url',
video_id, 'Resolving URL for %s' % role)
real_url = compat_urlparse.parse_qs(msg)['message'][0]
real_url = compat_urlparse.parse_qs(msg.strip())['message'][0]
media_type = media_content.get('type')
if media_type == 'text/srt' or url.endswith('.srt'):

View File

@@ -13,6 +13,7 @@ import time
import xml.etree.ElementTree
from ..compat import (
compat_cookiejar,
compat_http_client,
compat_urllib_error,
compat_urllib_parse_urlparse,
@@ -817,6 +818,11 @@ class InfoExtractor(object):
self._downloader.report_warning(msg)
return res
def _set_cookie(self, domain, name, value, expire_time=None):
cookie = compat_cookiejar.Cookie(0, name, value, None, None, domain, None,
None, '/', True, False, expire_time, '', None, None, None)
self._downloader.cookiejar.set_cookie(cookie)
class SearchInfoExtractor(InfoExtractor):
"""

View File

@@ -1,3 +1,4 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
@@ -7,6 +8,7 @@ from .common import InfoExtractor
from ..compat import (
compat_str,
)
from ..utils import ExtractorError
class MySpaceIE(InfoExtractor):
@@ -14,33 +16,58 @@ class MySpaceIE(InfoExtractor):
_TESTS = [
{
'url': 'https://myspace.com/coldplay/video/viva-la-vida/100008689',
'url': 'https://myspace.com/fiveminutestothestage/video/little-big-town/109594919',
'info_dict': {
'id': '100008689',
'id': '109594919',
'ext': 'flv',
'title': 'Viva La Vida',
'description': 'The official Viva La Vida video, directed by Hype Williams',
'uploader': 'Coldplay',
'uploader_id': 'coldplay',
'title': 'Little Big Town',
'description': 'This country quartet was all smiles while playing a sold out show at the Pacific Amphitheatre in Orange County, California.',
'uploader': 'Five Minutes to the Stage',
'uploader_id': 'fiveminutestothestage',
},
'params': {
# rtmp download
'skip_download': True,
},
},
# song
# songs
{
'url': 'https://myspace.com/spiderbags/music/song/darkness-in-my-heart-39008454-27041242',
'url': 'https://myspace.com/killsorrow/music/song/of-weakened-soul...-93388656-103880681',
'info_dict': {
'id': '39008454',
'id': '93388656',
'ext': 'flv',
'title': 'Darkness In My Heart',
'uploader_id': 'spiderbags',
'title': 'Of weakened soul...',
'uploader': 'Killsorrow',
'uploader_id': 'killsorrow',
},
'params': {
# rtmp download
'skip_download': True,
},
}, {
'add_ie': ['Vevo'],
'url': 'https://myspace.com/threedaysgrace/music/song/animal-i-have-become-28400208-28218041',
'info_dict': {
'id': 'USZM20600099',
'ext': 'mp4',
'title': 'Animal I Have Become',
'uploader': 'Three Days Grace',
'timestamp': int,
'upload_date': '20060502',
},
'skip': 'VEVO is only available in some countries',
}, {
'add_ie': ['Youtube'],
'url': 'https://myspace.com/starset2/music/song/first-light-95799905-106964426',
'info_dict': {
'id': 'ypWvQgnJrSU',
'ext': 'mp4',
'title': 'Starset - First Light',
'description': 'md5:2d5db6c9d11d527683bcda818d332414',
'uploader': 'Jacob Soren',
'uploader_id': 'SorenPromotions',
'upload_date': '20140725',
}
},
]
@@ -48,16 +75,40 @@ class MySpaceIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
player_url = self._search_regex(
r'playerSwf":"([^"?]*)', webpage, 'player URL')
if mobj.group('mediatype').startswith('music/song'):
# songs don't store any useful info in the 'context' variable
song_data = self._search_regex(
r'''<button.*data-song-id=(["\'])%s\1.*''' % video_id,
webpage, 'song_data', default=None, group=0)
if song_data is None:
# some songs in an album are not playable
self.report_warning(
'%s: No downloadable song on this page' % video_id)
return
def search_data(name):
return self._search_regex(
r'data-%s="(.*?)"' % name, webpage, name)
r'''data-%s=([\'"])(?P<data>.*?)\1''' % name,
song_data, name, default='', group='data')
streamUrl = search_data('stream-url')
if not streamUrl:
vevo_id = search_data('vevo-id')
youtube_id = search_data('youtube-id')
if vevo_id:
self.to_screen('Vevo video detected: %s' % vevo_id)
return self.url_result('vevo:%s' % vevo_id, ie='Vevo')
elif youtube_id:
self.to_screen('Youtube video detected: %s' % youtube_id)
return self.url_result(youtube_id, ie='Youtube')
else:
raise ExtractorError(
'Found song but don\'t know how to download it')
info = {
'id': video_id,
'title': self._og_search_title(webpage),
'uploader': search_data('artist-name'),
'uploader_id': search_data('artist-username'),
'thumbnail': self._og_search_thumbnail(webpage),
}
@@ -79,6 +130,50 @@ class MySpaceIE(InfoExtractor):
info.update({
'url': rtmp_url,
'play_path': play_path,
'player_url': player_url,
'ext': 'flv',
})
return info
class MySpaceAlbumIE(InfoExtractor):
IE_NAME = 'MySpace:album'
_VALID_URL = r'https?://myspace\.com/([^/]+)/music/album/(?P<title>.*-)(?P<id>\d+)'
_TESTS = [{
'url': 'https://myspace.com/starset2/music/album/transmissions-19455773',
'info_dict': {
'title': 'Transmissions',
'id': '19455773',
},
'playlist_count': 14,
'skip': 'this album is only available in some countries',
}, {
'url': 'https://myspace.com/killsorrow/music/album/the-demo-18596029',
'info_dict': {
'title': 'The Demo',
'id': '18596029',
},
'playlist_count': 5,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
playlist_id = mobj.group('id')
display_id = mobj.group('title') + playlist_id
webpage = self._download_webpage(url, display_id)
tracks_paths = re.findall(r'"music:song" content="(.*?)"', webpage)
if not tracks_paths:
raise ExtractorError(
'%s: No songs found, try using proxy' % display_id,
expected=True)
entries = [
self.url_result(t_path, ie=MySpaceIE.ie_key())
for t_path in tracks_paths]
return {
'_type': 'playlist',
'id': playlist_id,
'display_id': display_id,
'title': self._og_search_title(webpage),
'entries': entries,
}

View File

@@ -20,6 +20,7 @@ class NocoIE(InfoExtractor):
_VALID_URL = r'http://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)'
_LOGIN_URL = 'http://noco.tv/do.php'
_API_URL_TEMPLATE = 'https://api.noco.tv/1.1/%s?ts=%s&tk=%s'
_SUB_LANG_TEMPLATE = '&sub_lang=%s'
_NETRC_MACHINE = 'noco'
_TEST = {
@@ -60,10 +61,12 @@ class NocoIE(InfoExtractor):
if 'erreur' in login:
raise ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True)
def _call_api(self, path, video_id, note):
def _call_api(self, path, video_id, note, sub_lang=None):
ts = compat_str(int(time.time() * 1000))
tk = hashlib.md5((hashlib.md5(ts.encode('ascii')).hexdigest() + '#8S?uCraTedap6a').encode('ascii')).hexdigest()
url = self._API_URL_TEMPLATE % (path, ts, tk)
if sub_lang:
url += self._SUB_LANG_TEMPLATE % sub_lang
resp = self._download_json(url, video_id, note)
@@ -91,31 +94,34 @@ class NocoIE(InfoExtractor):
formats = []
for format_id, fmt in medias['fr']['video_list']['none']['quality_list'].items():
for lang, lang_dict in medias['fr']['video_list'].items():
for format_id, fmt in lang_dict['quality_list'].items():
format_id_extended = '%s-%s' % (lang, format_id) if lang != 'none' else format_id
video = self._call_api(
'shows/%s/video/%s/fr' % (video_id, format_id.lower()),
video_id, 'Downloading %s video JSON' % format_id)
video = self._call_api(
'shows/%s/video/%s/fr' % (video_id, format_id.lower()),
video_id, 'Downloading %s video JSON' % format_id_extended,
lang if lang != 'none' else None)
file_url = video['file']
if not file_url:
continue
file_url = video['file']
if not file_url:
continue
if file_url in ['forbidden', 'not found']:
popmessage = video['popmessage']
self._raise_error(popmessage['title'], popmessage['message'])
if file_url in ['forbidden', 'not found']:
popmessage = video['popmessage']
self._raise_error(popmessage['title'], popmessage['message'])
formats.append({
'url': file_url,
'format_id': format_id,
'width': fmt['res_width'],
'height': fmt['res_lines'],
'abr': fmt['audiobitrate'],
'vbr': fmt['videobitrate'],
'filesize': fmt['filesize'],
'format_note': qualities[format_id]['quality_name'],
'preference': qualities[format_id]['priority'],
})
formats.append({
'url': file_url,
'format_id': format_id_extended,
'width': fmt['res_width'],
'height': fmt['res_lines'],
'abr': fmt['audiobitrate'],
'vbr': fmt['videobitrate'],
'filesize': fmt['filesize'],
'format_note': qualities[format_id]['quality_name'],
'preference': qualities[format_id]['priority'],
})
self._sort_formats(formats)

View File

@@ -4,6 +4,8 @@ import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
clean_html,
compat_urllib_parse,
)
@@ -28,6 +30,11 @@ class PlayvidIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
m_error = re.search(
r'<div class="block-error">\s*<div class="heading">\s*<div>(?P<msg>.+?)</div>\s*</div>', webpage)
if m_error:
raise ExtractorError(clean_html(m_error.group('msg')), expected=True)
video_title = None
duration = None
video_thumbnail = None

View File

@@ -39,7 +39,7 @@ class SlideshareIE(InfoExtractor):
ext = info['jsplayer']['video_extension']
video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext)
description = self._html_search_regex(
r'<p\s+(?:style="[^"]*"\s+)?class="description.*?"[^>]*>(.*?)</p>', webpage,
r'<p\s+(?:style="[^"]*"\s+)?class=".*?description.*?"[^>]*>(.*?)</p>', webpage,
'description', fatal=False)
return {

View File

@@ -19,6 +19,7 @@ class TuneInIE(InfoExtractor):
|tun\.in/(?P<redirect_id>[A-Za-z0-9]+)
)
'''
_API_URL_TEMPLATE = 'http://tunein.com/tuner/tune/?stationId={0:}&tuneType=Station'
_INFO_DICT = {
'id': '34682',
@@ -56,13 +57,10 @@ class TuneInIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
station_id = mobj.group('id')
webpage = self._download_webpage(
url, station_id, note='Downloading station webpage')
station_info = self._download_json(
self._API_URL_TEMPLATE.format(station_id),
station_id, note='Downloading station JSON')
payload = self._html_search_regex(
r'(?m)TuneIn\.payload\s*=\s*(\{[^$]+?)$', webpage, 'JSON data')
json_data = json.loads(payload)
station_info = json_data['Station']['broadcast']
title = station_info['Title']
thumbnail = station_info.get('Logo')
location = station_info.get('Location')

View File

@@ -13,7 +13,7 @@ from ..utils import (
class VevoIE(InfoExtractor):
"""
Accepts urls from vevo.com or in the format 'vevo:{id}'
(currently used by MTVIE)
(currently used by MTVIE and MySpaceIE)
"""
_VALID_URL = r'''(?x)
(?:https?://www\.vevo\.com/watch/(?:[^/]+/(?:[^/]+/)?)?|

View File

@@ -50,7 +50,7 @@ class XMinusIE(InfoExtractor):
webpage, 'view count', fatal=False))
enc_token = self._html_search_regex(
r'data-mt="(.*?)"', webpage, 'enc_token')
r'minus_track\.tkn="(.+?)"', webpage, 'enc_token')
token = ''.join(
c if pos == 3 else compat_chr(compat_ord(c) - 1)
for pos, c in enumerate(reversed(enc_token)))

View File

@@ -7,6 +7,7 @@ import itertools
import json
import os.path
import re
import time
import traceback
from .common import InfoExtractor, SearchInfoExtractor
@@ -38,17 +39,14 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
"""Provide base functions for Youtube extractors"""
_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
_TWOFACTOR_URL = 'https://accounts.google.com/SecondFactor'
_LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
_AGE_URL = 'https://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
_NETRC_MACHINE = 'youtube'
# If True it will raise an error if no login info is provided
_LOGIN_REQUIRED = False
def _set_language(self):
return bool(self._download_webpage(
self._LANG_URL, None,
note='Setting language', errnote='unable to set language',
fatal=False))
self._set_cookie('.youtube.com', 'PREF', 'f1=50000000&hl=en',
# YouTube sets the expire time to about two months
expire_time=time.time() + 60*24*3600)
def _login(self):
"""
@@ -176,30 +174,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
return False
return True
def _confirm_age(self):
age_form = {
'next_url': '/',
'action_confirm': 'Confirm',
}
req = compat_urllib_request.Request(
self._AGE_URL,
compat_urllib_parse.urlencode(age_form).encode('ascii')
)
self._download_webpage(
req, None,
note='Confirming age', errnote='Unable to confirm age',
fatal=False)
def _real_initialize(self):
if self._downloader is None:
return
if self._get_login_info()[0] is not None:
if not self._set_language():
return
self._set_language()
if not self._login():
return
self._confirm_age()
class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
@@ -305,6 +285,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'VP9'},
# Dash webm audio
'171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
@@ -398,8 +379,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'info_dict': {
'id': 'IB3lcPjvWLA',
'ext': 'm4a',
'title': 'Afrojack - The Spark ft. Spree Wilson',
'description': 'md5:9717375db5a9a3992be4668bbf3bc0a8',
'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d',
'uploader': 'AfrojackVEVO',
'uploader_id': 'AfrojackVEVO',
'upload_date': '20131011',
@@ -421,7 +402,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'title': 'Burning Everyone\'s Koran',
'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
}
}
},
# Normal age-gate video (No vevo, embed allowed)
{
'url': 'http://youtube.com/watch?v=HtVdAasjOgU',
'info_dict': {
'id': 'HtVdAasjOgU',
'ext': 'mp4',
'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
'description': 'md5:eca57043abae25130f58f655ad9a7771',
'uploader': 'The Witcher',
'uploader_id': 'WitcherGame',
'upload_date': '20140605',
},
},
]
def __init__(self, *args, **kwargs):
@@ -684,16 +678,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
# Get video webpage
url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
pref_cookies = [
c for c in self._downloader.cookiejar
if c.domain == '.youtube.com' and c.name == 'PREF']
for pc in pref_cookies:
if 'hl=' in pc.value:
pc.value = re.sub(r'hl=[^&]+', 'hl=en', pc.value)
else:
if pc.value:
pc.value += '&'
pc.value += 'hl=en'
video_webpage = self._download_webpage(url, video_id)
# Attempt to extract SWF player URL
@@ -704,7 +688,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
player_url = None
# Get video info
self.report_video_info_webpage_download(video_id)
if re.search(r'player-age-gate-content">', video_webpage) is not None:
age_gate = True
# We simulate the access to the video from www.youtube.com/v/{video_id}
@@ -723,15 +706,30 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
video_info = compat_parse_qs(video_info_webpage)
else:
age_gate = False
for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
video_info_url = (proto + '://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
% (video_id, el_type))
video_info_webpage = self._download_webpage(video_info_url, video_id,
note=False,
errnote='unable to download video info webpage')
video_info = compat_parse_qs(video_info_webpage)
if 'token' in video_info:
break
try:
# Try looking directly into the video webpage
mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});', video_webpage)
if not mobj:
raise ValueError('Could not find ytplayer.config') # caught below
json_code = uppercase_escape(mobj.group(1))
ytplayer_config = json.loads(json_code)
args = ytplayer_config['args']
# Convert to the same format returned by compat_parse_qs
video_info = dict((k, [v]) for k, v in args.items())
if 'url_encoded_fmt_stream_map' not in args:
raise ValueError('No stream_map present') # caught below
except ValueError:
# We fallback to the get_video_info pages (used by the embed page)
self.report_video_info_webpage_download(video_id)
for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
video_info_url = (proto + '://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
% (video_id, el_type))
video_info_webpage = self._download_webpage(video_info_url,
video_id, note=False,
errnote='unable to download video info webpage')
video_info = compat_parse_qs(video_info_webpage)
if 'token' in video_info:
break
if 'token' not in video_info:
if 'reason' in video_info:
raise ExtractorError(
@@ -856,32 +854,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
if self._downloader.params.get('writeannotations', False):
video_annotations = self._extract_annotations(video_id)
# Decide which formats to download
try:
mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});', video_webpage)
if not mobj:
raise ValueError('Could not find vevo ID')
json_code = uppercase_escape(mobj.group(1))
ytplayer_config = json.loads(json_code)
args = ytplayer_config['args']
# Easy way to know if the 's' value is in url_encoded_fmt_stream_map
# this signatures are encrypted
if 'url_encoded_fmt_stream_map' not in args:
raise ValueError('No stream_map present') # caught below
re_signature = re.compile(r'[&,]s=')
m_s = re_signature.search(args['url_encoded_fmt_stream_map'])
if m_s is not None:
self.to_screen('%s: Encrypted signatures detected.' % video_id)
video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
m_s = re_signature.search(args.get('adaptive_fmts', ''))
if m_s is not None:
if 'adaptive_fmts' in video_info:
video_info['adaptive_fmts'][0] += ',' + args['adaptive_fmts']
else:
video_info['adaptive_fmts'] = [args['adaptive_fmts']]
except ValueError:
pass
def _map_to_format_list(urlmap):
formats = []
for itag, video_real_url in urlmap.items():
@@ -974,10 +946,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
# However, in the case of an age restriction there won't be any embedded dashmpd in the video_webpage.
# Luckily, it seems, this case uses some kind of default signature (len == 86), so the
# combination of get_video_info and the _static_decrypt_signature() decryption fallback will work here.
if age_gate:
dash_manifest_url = video_info.get('dashmpd')[0]
else:
dash_manifest_url = ytplayer_config['args']['dashmpd']
dash_manifest_url = video_info.get('dashmpd')[0]
def decrypt_sig(mobj):
s = mobj.group(1)

View File

@@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2014.11.27'
__version__ = '2014.12.01'