1
0
mirror of https://source.netsyms.com/Mirrors/youtube-dl synced 2026-03-29 15:23:52 +00:00

Compare commits

...

67 Commits

Author SHA1 Message Date
Sergey M․
5e77c0b58e release 2016.12.22 2016-12-22 22:52:54 +07:00
Remita Amine
ab3091feda [ChangeLog] Actualize 2016-12-22 22:51:51 +07:00
Remita Amine
a07588369f [common] improve detection for video only formats and m3u8 manifest(fixes #11507) 2016-12-22 10:02:56 +01:00
Remita Amine
f5a723a78a [theplatform] pass geo verification headers to smil request(closes #10146) 2016-12-21 20:59:03 +01:00
Remita Amine
f120646f04 [viu] pass geo verification headers to auth request 2016-12-21 20:50:10 +01:00
Remita Amine
9c5b5f2115 [rtl2] extract more formats and metadata 2016-12-21 18:46:25 +01:00
Sergey M․
ae806db628 [vbox7] Skip malformed JSON-LD (closes #11501) 2016-12-21 22:39:05 +07:00
Remita Amine
bfa1073e11 [uplynk] force downloading using hls native downloader(closes #11496) 2016-12-20 19:49:45 +01:00
Remita Amine
e029c43bd4 [laola1] add support for another extraction scenario(closes #11460) 2016-12-20 18:22:57 +01:00
Sergey M․
90352a8041 release 2016.12.20 2016-12-20 22:39:39 +07:00
Sergey M․
1f6a79b0af [ChangeLog] Actualize 2016-12-20 22:37:06 +07:00
Sergey M․
3d6761ba92 [vbox7] Fix extraction (closes #11494) 2016-12-20 21:53:51 +07:00
Remita Amine
f59d1146c0 [uktvplay] Add new extractor(closes #11027) 2016-12-20 12:52:46 +01:00
Remita Amine
b1c357975d [piksel] Add new extractor(closes #11246) 2016-12-20 12:35:03 +01:00
Remita Amine
d8c507c9e2 [vimeo] fix extraction for hls formats and add support for dash formats(closes #11490) 2016-12-20 12:35:03 +01:00
Remita Amine
7fe1592073 [common] fix dash codec information for mixed videos and fragment url construction(#11490) 2016-12-20 12:35:03 +01:00
Yen Chi Hsuan
8ab7e6c4cc [kaltura] Improve widget ID extraction (closes #11480) 2016-12-20 18:45:52 +08:00
Sergey M․
c80db5d398 [nrktv:direkte] Add support for live streams (#11488) 2016-12-19 23:47:45 +07:00
Remita Amine
5aaf012a4e [pbs] fix extraction for geo restricted videos(#7095) 2016-12-19 16:27:12 +01:00
Remita Amine
954529c10f [brightcove:new] skip widevine classic videos 2016-12-18 21:39:59 +01:00
Remita Amine
ed7b333fbf [viu] extract supported hls manifest 2016-12-18 18:24:01 +01:00
Remita Amine
723103151e [viu] improve extraction(closes #10607)(closes #11329) 2016-12-18 17:20:53 +01:00
ping
e7b6caef24 [viu] New extractor for viu.com 2016-12-18 17:20:53 +01:00
Sergey M․
ec79b1de1c Revert "Credit @pyx for meipai (#10718)"
This reverts commit d5e623aaa1.
2016-12-18 20:56:21 +07:00
Sergey M․
f73d7d5074 release 2016.12.18 2016-12-18 19:50:33 +07:00
Sergey M․
52a1d48d9f [ChangeLog] Actualize 2016-12-18 19:48:59 +07:00
Sergey M․
d5e623aaa1 Credit @pyx for meipai (#10718) 2016-12-18 19:46:57 +07:00
Remita Amine
199a47abba [ccma] Add new extractor(closes #11359) 2016-12-18 10:49:10 +01:00
Remita Amine
b42a0bf360 [laola1tv] add support embed urls and improve extraction(#11460) 2016-12-17 21:48:45 +01:00
Remita Amine
6e416b210c [nbc] fix extraction for msnbc videos(fixes #11466) 2016-12-17 18:11:13 +01:00
Sergey M․
04bf59ff64 [extractors] Add missing twitch imports 2016-12-17 23:03:50 +07:00
Sergey M․
87a449c1ed [extractor/common] Recognize DASH formats in html5 media entries 2016-12-17 23:03:13 +07:00
Sergey M․
93753aad20 [twitch] Adapt to new videos pages schema (closes #11469) 2016-12-17 20:20:23 +07:00
Sergey M․
2786818c33 [meipai] Fix regular videos extraction and improve (closes #10718) 2016-12-17 19:42:34 +07:00
Philip Xu
9b785768ac [meipai] Add extractor 2016-12-17 19:41:35 +07:00
Sergey M․
47c914f995 [ondemandkorea] Fix extraction (closes #10772) 2016-12-17 18:50:12 +07:00
Sergey M․
732d116aa7 [jwplatform] Improve duration extraction 2016-12-17 18:50:07 +07:00
Sergey M․
a495840d3b [jwplatform] Improve subtitles extraction 2016-12-17 18:50:00 +07:00
Sergey M․
b0c65c677f [utils] Improve urljoin 2016-12-17 18:49:55 +07:00
ping
594601f545 [ondemandkorea] Add extractor 2016-12-17 18:49:45 +07:00
Sergey M․
0ae9560eea [vporn] Use urljoin for thumbnail 2016-12-16 23:57:51 +07:00
Remita Amine
dc1f3a9f20 [vvvvid] do not cache the conn_id 2016-12-16 11:05:46 +01:00
Remita Amine
7b1e80792b [vvvvid] Add new extractor(closes #5915) 2016-12-16 09:05:34 +01:00
Sergey M․
38be3bc568 release 2016.12.15 2016-12-15 21:16:55 +07:00
Sergey M․
d7ef47bffd [ChangeLog] Actualize 2016-12-15 21:15:45 +07:00
Yen Chi Hsuan
5c32a5be95 [openload] Recognize oload.tv URLs (#10408) 2016-12-15 17:51:26 +08:00
Yen Chi Hsuan
30918999f5 [facebook] Recognize .onion URLs (closes #11443) 2016-12-15 01:04:49 +08:00
Sergey M․
069f918302 [vlive] Use live titles for live streams 2016-12-14 21:30:33 +07:00
Sergey M․
89c63cc5f8 [vlive] Add video params extraction fallback and improve (closes #11375) 2016-12-14 21:05:50 +07:00
Corey Nicholson
577748075b [vlive] Update extraction 2016-12-14 21:05:32 +07:00
Remita Amine
67dcbc0add [canvas] extract dash formats 2016-12-13 17:59:22 +01:00
Sergey M․
3a40f859b5 [melonvod] Improve (closes #11419) 2016-12-13 02:27:26 +07:00
Sergey M․
e34c33614d [utils] Add convenience urljoin 2016-12-13 02:23:49 +07:00
ping
abf3494ac7 [melonvod] Add extractor for vod.melon.com 2016-12-13 02:13:40 +07:00
Sergey M․
3c1e9dc4ec release 2016.12.12 2016-12-12 01:44:50 +07:00
Sergey M․
62faf9b55e [ChangeLog] Actualize 2016-12-12 01:41:08 +07:00
Sergey M․
3530e0d3d9 [dplay] Use Safari user-agent for hls (closes #11418) 2016-12-12 00:58:08 +07:00
Sergey M․
fb37eb25d9 [utils] Add common user agents map 2016-12-12 00:49:07 +07:00
Sergey M․
d2d2495e16 [facebook] Detect login required error message 2016-12-11 01:40:30 +07:00
Sergey M․
19b4900b7b [facebook] Improve video selection (closes #11390) 2016-12-11 01:22:01 +07:00
Sergey M․
6ca478d44a [canalplus] Add another video id regex (closes #11399) 2016-12-11 00:45:27 +07:00
Sergey M․
655cb545ab [mixcloud] Relax _VALID_URL (closes #11406) 2016-12-10 23:48:18 +07:00
Remita Amine
f0b69fa91a [ctvnews] relax _VALID_URL regex(closes #11394) 2016-12-10 17:36:32 +01:00
Remita Amine
8821a718cf [common] recognize hls manifests that contain video only formats(#11394) 2016-12-10 17:22:15 +01:00
Remita Amine
0d7d9f9404 [rte] improve extraction(closes #10498)(closes #7746) 2016-12-10 16:34:01 +01:00
Remita Amine
f41db40596 [prosiebensat1] extract dash formats 2016-12-10 13:29:51 +01:00
Remita Amine
68601ef3ac [rts,srgssr] improve extraction for geo restricted videos(fixes #11089)(closes #4989) 2016-12-10 10:47:56 +01:00
46 changed files with 1709 additions and 421 deletions

View File

@@ -6,8 +6,8 @@
---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.12.09*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.12.09**
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.12.22*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.12.22**
### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2016.12.09
[debug] youtube-dl version 2016.12.22
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}

View File

@@ -1,3 +1,87 @@
version 2016.12.22
Core
* [extractor/common] Improve detection of video-only formats in m3u8
manifests (#11507)
Extractors
+ [theplatform] Pass geo verification headers to SMIL request (#10146)
+ [viu] Pass geo verification headers to auth request
* [rtl2] Extract more formats and metadata
* [vbox7] Skip malformed JSON-LD (#11501)
* [uplynk] Force downloading using native HLS downloader (#11496)
+ [laola1] Add support for another extraction scenario (#11460)
version 2016.12.20
Core
* [extractor/common] Improve fragment URL construction for DASH media
* [extractor/common] Fix codec information extraction for mixed audio/video
DASH media (#11490)
Extractors
* [vbox7] Fix extraction (#11494)
+ [uktvplay] Add support for uktvplay.uktv.co.uk (#11027)
+ [piksel] Add support for player.piksel.com (#11246)
+ [vimeo] Add support for DASH formats
* [vimeo] Fix extraction for HLS formats (#11490)
* [kaltura] Fix wrong widget ID in some cases (#11480)
+ [nrktv:direkte] Add support for live streams (#11488)
* [pbs] Fix extraction for geo restricted videos (#7095)
* [brightcove:new] Skip widevine classic videos
+ [viu] Add support for viu.com (#10607, #11329)
version 2016.12.18
Core
+ [extractor/common] Recognize DASH formats in html5 media entries
Extractors
+ [ccma] Add support for ccma.cat (#11359)
* [laola1tv] Improve extraction
+ [laola1tv] Add support embed URLs (#11460)
* [nbc] Fix extraction for MSNBC videos (#11466)
* [twitch] Adapt to new videos pages URL schema (#11469)
+ [meipai] Add support for meipai.com (#10718)
* [jwplatform] Improve subtitles and duration extraction
+ [ondemandkorea] Add support for ondemandkorea.com (#10772)
+ [vvvvid] Add support for vvvvid.it (#5915)
version 2016.12.15
Core
+ [utils] Add convenience urljoin
Extractors
+ [openload] Recognize oload.tv URLs (#10408)
+ [facebook] Recognize .onion URLs (#11443)
* [vlive] Fix extraction (#11375, #11383)
+ [canvas] Extract DASH formats
+ [melonvod] Add support for vod.melon.com (#11419)
version 2016.12.12
Core
+ [utils] Add common user agents map
+ [common] Recognize HLS manifests that contain video only formats (#11394)
Extractors
+ [dplay] Use Safari user agent for HLS (#11418)
+ [facebook] Detect login required error message
* [facebook] Improve video selection (#11390)
+ [canalplus] Add another video id pattern (#11399)
* [mixcloud] Relax URL regular expression (#11406)
* [ctvnews] Relax URL regular expression (#11394)
+ [rte] Capture and output error message (#7746, #10498)
+ [prosiebensat1] Add support for DASH formats
* [srgssr] Improve extraction for geo restricted videos (#11089)
* [rts] Improve extraction for geo restricted videos (#4989)
version 2016.12.09
Core
@@ -15,6 +99,7 @@ Extractors
+ [aenetworks] Extract more formats (#11321)
+ [thisoldhouse] Recognize /tv-episode/ URLs (#11271)
version 2016.12.01
Extractors

View File

@@ -131,6 +131,7 @@
- **cbsnews**: CBS News
- **cbsnews:livevideo**: CBS News Live Videos
- **CBSSports**
- **CCMA**
- **CCTV**
- **CDA**
- **CeskaTelevize**
@@ -364,7 +365,8 @@
- **kuwo:singer**: 酷我音乐 - 歌手
- **kuwo:song**: 酷我音乐
- **la7.it**
- **Laola1Tv**
- **laola1tv**
- **laola1tv:embed**
- **LCI**
- **Lcp**
- **LcpPlay**
@@ -402,6 +404,8 @@
- **MatchTV**
- **MDR**: MDR.DE and KiKA
- **media.ccc.de**
- **Meipai**: 美拍
- **MelonVOD**
- **META**
- **metacafe**
- **Metacritic**
@@ -513,6 +517,7 @@
- **NRKPlaylist**
- **NRKSkole**: NRK Skole
- **NRKTV**: NRK TV and NRK Radio
- **NRKTVDirekte**: NRK TV Direkte and NRK Radio Direkte
- **ntv.ru**
- **Nuvid**
- **NYTimes**
@@ -523,6 +528,7 @@
- **Odnoklassniki**
- **OktoberfestTV**
- **on.aol.com**
- **OnDemandKorea**
- **onet.tv**
- **onet.tv:channel**
- **OnionStudios**
@@ -546,6 +552,7 @@
- **PhilharmonieDeParis**: Philharmonie de Paris
- **phoenix.de**
- **Photobucket**
- **Piksel**
- **Pinkbike**
- **Pladform**
- **play.fm**
@@ -784,10 +791,13 @@
- **Tweakers**
- **twitch:chapter**
- **twitch:clips**
- **twitch:past_broadcasts**
- **twitch:profile**
- **twitch:stream**
- **twitch:video**
- **twitch:videos:all**
- **twitch:videos:highlights**
- **twitch:videos:past-broadcasts**
- **twitch:videos:uploads**
- **twitch:vod**
- **twitter**
- **twitter:amplify**
@@ -795,6 +805,7 @@
- **udemy**
- **udemy:course**
- **UDNEmbed**: 聯合影音
- **UKTVPlay**
- **Unistra**
- **uol.com.br**
- **uplynk**
@@ -859,6 +870,9 @@
- **Vimple**: Vimple - one-click video hosting
- **Vine**
- **vine:user**
- **Viu**
- **viu:ott**
- **viu:playlist**
- **Vivo**: vivo.sx
- **vk**: VK
- **vk:uservideos**: VK - User's Videos
@@ -873,6 +887,7 @@
- **VRT**
- **vube**: Vube.com
- **VuClip**
- **VVVVID**
- **VyboryMos**
- **Vzaar**
- **Walla**

View File

@@ -70,6 +70,7 @@ from youtube_dl.utils import (
lowercase_escape,
url_basename,
base_url,
urljoin,
urlencode_postdata,
urshift,
update_url_query,
@@ -445,6 +446,23 @@ class TestUtil(unittest.TestCase):
self.assertEqual(base_url('http://foo.de/bar/baz'), 'http://foo.de/bar/')
self.assertEqual(base_url('http://foo.de/bar/baz?x=z/x/c'), 'http://foo.de/bar/')
def test_urljoin(self):
self.assertEqual(urljoin('http://foo.de/', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
self.assertEqual(urljoin('//foo.de/', '/a/b/c.txt'), '//foo.de/a/b/c.txt')
self.assertEqual(urljoin('http://foo.de/', 'a/b/c.txt'), 'http://foo.de/a/b/c.txt')
self.assertEqual(urljoin('http://foo.de', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
self.assertEqual(urljoin('http://foo.de', 'a/b/c.txt'), 'http://foo.de/a/b/c.txt')
self.assertEqual(urljoin('http://foo.de/', 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
self.assertEqual(urljoin('http://foo.de/', '//foo.de/a/b/c.txt'), '//foo.de/a/b/c.txt')
self.assertEqual(urljoin(None, 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
self.assertEqual(urljoin(None, '//foo.de/a/b/c.txt'), '//foo.de/a/b/c.txt')
self.assertEqual(urljoin('', 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
self.assertEqual(urljoin(['foobar'], 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
self.assertEqual(urljoin('http://foo.de/', None), None)
self.assertEqual(urljoin('http://foo.de/', ''), None)
self.assertEqual(urljoin('http://foo.de/', ['foobar']), None)
self.assertEqual(urljoin('http://foo.de/a/b/c.txt', '.././../d.txt'), 'http://foo.de/d.txt')
def test_parse_age_limit(self):
self.assertEqual(parse_age_limit(None), None)
self.assertEqual(parse_age_limit(False), None)

View File

@@ -65,6 +65,9 @@ class HlsFD(FragmentFD):
s = manifest.decode('utf-8', 'ignore')
if not self.can_download(s, info_dict):
if info_dict.get('extra_param_to_segment_url'):
self.report_error('pycrypto not found. Please install it.')
return False
self.report_warning(
'hlsnative has detected features it does not support, '
'extraction will be delegated to ffmpeg')

View File

@@ -548,7 +548,7 @@ class BrightcoveNewIE(InfoExtractor):
container = source.get('container')
ext = mimetype2ext(source.get('type'))
src = source.get('src')
if ext == 'ism':
if ext == 'ism' or container == 'WVM':
continue
elif ext == 'm3u8' or container == 'M2TS':
if not src:

View File

@@ -105,7 +105,8 @@ class CanalplusIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
video_id = self._search_regex(
[r'<canal:player[^>]+?videoId=(["\'])(?P<id>\d+)',
r'id=["\']canal_video_player(?P<id>\d+)'],
r'id=["\']canal_video_player(?P<id>\d+)',
r'data-video=["\'](?P<id>\d+)'],
webpage, 'video id', group='id')
info_url = self._VIDEO_INFO_TEMPLATE % (site_id, video_id)

View File

@@ -89,6 +89,9 @@ class CanvasIE(InfoExtractor):
elif format_type == 'HDS':
formats.extend(self._extract_f4m_formats(
format_url, display_id, f4m_id=format_type, fatal=False))
elif format_type == 'MPEG_DASH':
formats.extend(self._extract_mpd_formats(
format_url, display_id, mpd_id=format_type, fatal=False))
else:
formats.append({
'format_id': format_type,

View File

@@ -283,11 +283,6 @@ class CBCWatchVideoIE(CBCWatchBaseIE):
formats = self._extract_m3u8_formats(re.sub(r'/([^/]+)/[^/?]+\.m3u8', r'/\1/\1.m3u8', m3u8_url), video_id, 'mp4', fatal=False)
if len(formats) < 2:
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
# Despite metadata in m3u8 all video+audio formats are
# actually video-only (no audio)
for f in formats:
if f.get('acodec') != 'none' and f.get('vcodec') != 'none':
f['acodec'] = 'none'
self._sort_formats(formats)
info = {

View File

@@ -0,0 +1,99 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_duration,
parse_iso8601,
clean_html,
)
class CCMAIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ccma\.cat/(?:[^/]+/)*?(?P<type>video|audio)/(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.ccma.cat/tv3/alacarta/lespot-de-la-marato-de-tv3/lespot-de-la-marato-de-tv3/video/5630208/',
'md5': '7296ca43977c8ea4469e719c609b0871',
'info_dict': {
'id': '5630208',
'ext': 'mp4',
'title': 'L\'espot de La Marató de TV3',
'description': 'md5:f12987f320e2f6e988e9908e4fe97765',
'timestamp': 1470918540,
'upload_date': '20160811',
}
}, {
'url': 'http://www.ccma.cat/catradio/alacarta/programa/el-consell-de-savis-analitza-el-derbi/audio/943685/',
'md5': 'fa3e38f269329a278271276330261425',
'info_dict': {
'id': '943685',
'ext': 'mp3',
'title': 'El Consell de Savis analitza el derbi',
'description': 'md5:e2a3648145f3241cb9c6b4b624033e53',
'upload_date': '20171205',
'timestamp': 1512507300,
}
}]
def _real_extract(self, url):
media_type, media_id = re.match(self._VALID_URL, url).groups()
media_data = {}
formats = []
profiles = ['pc'] if media_type == 'audio' else ['mobil', 'pc']
for i, profile in enumerate(profiles):
md = self._download_json('http://dinamics.ccma.cat/pvideo/media.jsp', media_id, query={
'media': media_type,
'idint': media_id,
'profile': profile,
}, fatal=False)
if md:
media_data = md
media_url = media_data.get('media', {}).get('url')
if media_url:
formats.append({
'format_id': profile,
'url': media_url,
'quality': i,
})
self._sort_formats(formats)
informacio = media_data['informacio']
title = informacio['titol']
durada = informacio.get('durada', {})
duration = int_or_none(durada.get('milisegons'), 1000) or parse_duration(durada.get('text'))
timestamp = parse_iso8601(informacio.get('data_emissio', {}).get('utc'))
subtitles = {}
subtitols = media_data.get('subtitols', {})
if subtitols:
sub_url = subtitols.get('url')
if sub_url:
subtitles.setdefault(
subtitols.get('iso') or subtitols.get('text') or 'ca', []).append({
'url': sub_url,
})
thumbnails = []
imatges = media_data.get('imatges', {})
if imatges:
thumbnail_url = imatges.get('url')
if thumbnail_url:
thumbnails = [{
'url': thumbnail_url,
'width': int_or_none(imatges.get('amplada')),
'height': int_or_none(imatges.get('alcada')),
}]
return {
'id': media_id,
'title': title,
'description': clean_html(informacio.get('descripcio')),
'duration': duration,
'timestamp': timestamp,
'thumnails': thumbnails,
'subtitles': subtitles,
'formats': formats,
}

View File

@@ -59,6 +59,7 @@ from ..utils import (
parse_m3u8_attributes,
extract_attributes,
parse_codecs,
urljoin,
)
@@ -1224,6 +1225,7 @@ class InfoExtractor(object):
'protocol': entry_protocol,
'preference': preference,
}]
audio_in_video_stream = {}
last_info = {}
last_media = {}
for line in m3u8_doc.splitlines():
@@ -1233,25 +1235,32 @@ class InfoExtractor(object):
media = parse_m3u8_attributes(line)
media_type = media.get('TYPE')
if media_type in ('VIDEO', 'AUDIO'):
group_id = media.get('GROUP-ID')
media_url = media.get('URI')
if media_url:
format_id = []
for v in (media.get('GROUP-ID'), media.get('NAME')):
for v in (group_id, media.get('NAME')):
if v:
format_id.append(v)
formats.append({
f = {
'format_id': '-'.join(format_id),
'url': format_url(media_url),
'language': media.get('LANGUAGE'),
'vcodec': 'none' if media_type == 'AUDIO' else None,
'ext': ext,
'protocol': entry_protocol,
'preference': preference,
})
}
if media_type == 'AUDIO':
f['vcodec'] = 'none'
if group_id and not audio_in_video_stream.get(group_id):
audio_in_video_stream[group_id] = False
formats.append(f)
else:
# When there is no URI in EXT-X-MEDIA let this tag's
# data be used by regular URI lines below
last_media = media
if media_type == 'AUDIO' and group_id:
audio_in_video_stream[group_id] = True
elif line.startswith('#') or not line.strip():
continue
else:
@@ -1295,6 +1304,9 @@ class InfoExtractor(object):
'abr': abr,
})
f.update(parse_codecs(last_info.get('CODECS')))
if audio_in_video_stream.get(last_info.get('AUDIO')) is False:
# TODO: update acodec for for audio only formats with the same GROUP-ID
f['acodec'] = 'none'
formats.append(f)
last_info = {}
last_media = {}
@@ -1624,11 +1636,6 @@ class InfoExtractor(object):
extract_Initialization(segment_template)
return ms_info
def combine_url(base_url, target_url):
if re.match(r'^https?://', target_url):
return target_url
return '%s%s%s' % (base_url, '' if base_url.endswith('/') else '/', target_url)
mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
formats = []
for period in mpd_doc.findall(_add_ns('Period')):
@@ -1678,12 +1685,11 @@ class InfoExtractor(object):
'tbr': int_or_none(representation_attrib.get('bandwidth'), 1000),
'asr': int_or_none(representation_attrib.get('audioSamplingRate')),
'fps': int_or_none(representation_attrib.get('frameRate')),
'vcodec': 'none' if content_type == 'audio' else representation_attrib.get('codecs'),
'acodec': 'none' if content_type == 'video' else representation_attrib.get('codecs'),
'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
'format_note': 'DASH %s' % content_type,
'filesize': filesize,
}
f.update(parse_codecs(representation_attrib.get('codecs')))
representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
if 'segment_urls' not in representation_ms_info and 'media_template' in representation_ms_info:
@@ -1767,7 +1773,7 @@ class InfoExtractor(object):
f['fragments'].append({'url': initialization_url})
f['fragments'].extend(representation_ms_info['fragments'])
for fragment in f['fragments']:
fragment['url'] = combine_url(base_url, fragment['url'])
fragment['url'] = urljoin(base_url, fragment['url'])
try:
existing_format = next(
fo for fo in formats
@@ -1881,7 +1887,7 @@ class InfoExtractor(object):
})
return formats
def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8'):
def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None):
def absolute_url(video_url):
return compat_urlparse.urljoin(base_url, video_url)
@@ -1898,11 +1904,16 @@ class InfoExtractor(object):
def _media_formats(src, cur_media_type):
full_url = absolute_url(src)
if determine_ext(full_url) == 'm3u8':
ext = determine_ext(full_url)
if ext == 'm3u8':
is_plain_url = False
formats = self._extract_m3u8_formats(
full_url, video_id, ext='mp4',
entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id)
elif ext == 'mpd':
is_plain_url = False
formats = self._extract_mpd_formats(
full_url, video_id, mpd_id=mpd_id)
else:
is_plain_url = True
formats = [{

View File

@@ -8,7 +8,7 @@ from ..utils import orderedSet
class CTVNewsIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ctvnews\.ca/(?:video\?(?:clip|playlist|bin)Id=|.*?)(?P<id>[0-9.]+)'
_VALID_URL = r'https?://(?:.+?\.)?ctvnews\.ca/(?:video\?(?:clip|playlist|bin)Id=|.*?)(?P<id>[0-9.]+)'
_TESTS = [{
'url': 'http://www.ctvnews.ca/video?clipId=901995',
'md5': '10deb320dc0ccb8d01d34d12fc2ea672',
@@ -40,6 +40,9 @@ class CTVNewsIE(InfoExtractor):
}, {
'url': 'http://www.ctvnews.ca/canadiens-send-p-k-subban-to-nashville-in-blockbuster-trade-1.2967231',
'only_matching': True,
}, {
'url': 'http://vancouverisland.ctvnews.ca/video?clipId=761241',
'only_matching': True,
}]
def _real_extract(self, url):

View File

@@ -8,6 +8,7 @@ import time
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
USER_AGENTS,
int_or_none,
update_url_query,
)
@@ -102,10 +103,16 @@ class DPlayIE(InfoExtractor):
manifest_url, video_id, ext='mp4',
entry_protocol='m3u8_native', m3u8_id=protocol, fatal=False)
# Sometimes final URLs inside m3u8 are unsigned, let's fix this
# ourselves
# ourselves. Also fragments' URLs are only served signed for
# Safari user agent.
query = compat_urlparse.parse_qs(compat_urlparse.urlparse(manifest_url).query)
for m3u8_format in m3u8_formats:
m3u8_format['url'] = update_url_query(m3u8_format['url'], query)
m3u8_format.update({
'url': update_url_query(m3u8_format['url'], query),
'http_headers': {
'User-Agent': USER_AGENTS['Safari'],
},
})
formats.extend(m3u8_formats)
elif protocol == 'hds':
formats.extend(self._extract_f4m_formats(

View File

@@ -150,6 +150,7 @@ from .cbsnews import (
)
from .cbssports import CBSSportsIE
from .ccc import CCCIE
from .ccma import CCMAIE
from .cctv import CCTVIE
from .cda import CDAIE
from .ceskatelevize import CeskaTelevizeIE
@@ -446,7 +447,10 @@ from .kuwo import (
KuwoMvIE,
)
from .la7 import LA7IE
from .laola1tv import Laola1TvIE
from .laola1tv import (
Laola1TvEmbedIE,
Laola1TvIE,
)
from .lci import LCIIE
from .lcp import (
LcpPlayIE,
@@ -498,6 +502,8 @@ from .mangomolo import (
)
from .matchtv import MatchTVIE
from .mdr import MDRIE
from .meipai import MeipaiIE
from .melonvod import MelonVODIE
from .meta import METAIE
from .metacafe import MetacafeIE
from .metacritic import MetacriticIE
@@ -649,6 +655,7 @@ from .nrk import (
NRKPlaylistIE,
NRKSkoleIE,
NRKTVIE,
NRKTVDirekteIE,
)
from .ntvde import NTVDeIE
from .ntvru import NTVRuIE
@@ -661,6 +668,7 @@ from .nzz import NZZIE
from .odatv import OdaTVIE
from .odnoklassniki import OdnoklassnikiIE
from .oktoberfesttv import OktoberfestTVIE
from .ondemandkorea import OnDemandKoreaIE
from .onet import (
OnetIE,
OnetChannelIE,
@@ -691,6 +699,7 @@ from .periscope import (
from .philharmoniedeparis import PhilharmonieDeParisIE
from .phoenix import PhoenixIE
from .photobucket import PhotobucketIE
from .piksel import PikselIE
from .pinkbike import PinkbikeIE
from .pladform import PladformIE
from .playfm import PlayFMIE
@@ -998,7 +1007,10 @@ from .twitch import (
TwitchChapterIE,
TwitchVodIE,
TwitchProfileIE,
TwitchAllVideosIE,
TwitchUploadsIE,
TwitchPastBroadcastsIE,
TwitchHighlightsIE,
TwitchStreamIE,
TwitchClipsIE,
)
@@ -1012,6 +1024,7 @@ from .udemy import (
UdemyCourseIE
)
from .udn import UDNEmbedIE
from .uktvplay import UKTVPlayIE
from .digiteka import DigitekaIE
from .unistra import UnistraIE
from .uol import UOLIE
@@ -1095,6 +1108,11 @@ from .viki import (
VikiIE,
VikiChannelIE,
)
from .viu import (
ViuIE,
ViuPlaylistIE,
ViuOTTIE,
)
from .vk import (
VKIE,
VKUserVideosIE,
@@ -1109,6 +1127,7 @@ from .vporn import VpornIE
from .vrt import VRTIE
from .vube import VubeIE
from .vuclip import VuClipIE
from .vvvvid import VVVVIDIE
from .vyborymos import VyboryMosIE
from .vzaar import VzaarIE
from .walla import WallaIE

View File

@@ -27,7 +27,7 @@ class FacebookIE(InfoExtractor):
_VALID_URL = r'''(?x)
(?:
https?://
(?:[\w-]+\.)?facebook\.com/
(?:[\w-]+\.)?(?:facebook\.com|facebookcorewwwi\.onion)/
(?:[^#]*?\#!/)?
(?:
(?:
@@ -150,6 +150,9 @@ class FacebookIE(InfoExtractor):
}, {
'url': 'https://zh-hk.facebook.com/peoplespower/videos/1135894589806027/',
'only_matching': True,
}, {
'url': 'https://www.facebookcorewwwi.onion/video.php?v=274175099429670',
'only_matching': True,
}]
@staticmethod
@@ -244,8 +247,10 @@ class FacebookIE(InfoExtractor):
r'handleServerJS\(({.+})(?:\);|,")', webpage, 'server js data', default='{}'), video_id)
for item in server_js_data.get('instances', []):
if item[1][0] == 'VideoConfig':
video_data = item[2][0]['videoData']
break
video_item = item[2][0]
if video_item.get('video_id') == video_id:
video_data = video_item['videoData']
break
if not video_data:
if not fatal_if_no_video:
@@ -255,6 +260,8 @@ class FacebookIE(InfoExtractor):
raise ExtractorError(
'The video is not available, Facebook said: "%s"' % m_msg.group(1),
expected=True)
elif '>You must log in to continue' in webpage:
self.raise_login_required()
else:
raise ExtractorError('Cannot parse data')

View File

@@ -75,6 +75,7 @@ from .facebook import FacebookIE
from .soundcloud import SoundcloudIE
from .vbox7 import Vbox7IE
from .dbtv import DBTVIE
from .piksel import PikselIE
class GenericIE(InfoExtractor):
@@ -972,6 +973,20 @@ class GenericIE(InfoExtractor):
'skip_download': True,
}
},
{
# Kaltura embedded, some fileExt broken (#11480)
'url': 'http://www.cornell.edu/video/nima-arkani-hamed-standard-models-of-particle-physics',
'info_dict': {
'id': '1_sgtvehim',
'ext': 'mp4',
'title': 'Our "Standard Models" of particle physics and cosmology',
'description': 'md5:67ea74807b8c4fea92a6f38d6d323861',
'timestamp': 1321158993,
'upload_date': '20111113',
'uploader_id': 'kps1',
},
'add_ie': ['Kaltura'],
},
# Eagle.Platform embed (generic URL)
{
'url': 'http://lenta.ru/news/2015/03/06/navalny/',
@@ -2211,6 +2226,11 @@ class GenericIE(InfoExtractor):
if arkena_url:
return self.url_result(arkena_url, ArkenaIE.ie_key())
# Look for Piksel embeds
piksel_url = PikselIE._extract_url(webpage)
if piksel_url:
return self.url_result(piksel_url, PikselIE.ie_key())
# Look for Limelight embeds
mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
if mobj:

View File

@@ -11,6 +11,7 @@ from ..utils import (
int_or_none,
js_to_json,
mimetype2ext,
urljoin,
)
@@ -110,10 +111,14 @@ class JWPlatformBaseIE(InfoExtractor):
tracks = video_data.get('tracks')
if tracks and isinstance(tracks, list):
for track in tracks:
if track.get('file') and track.get('kind') == 'captions':
subtitles.setdefault(track.get('label') or 'en', []).append({
'url': self._proto_relative_url(track['file'])
})
if track.get('kind') != 'captions':
continue
track_url = urljoin(base_url, track.get('file'))
if not track_url:
continue
subtitles.setdefault(track.get('label') or 'en', []).append({
'url': self._proto_relative_url(track_url)
})
entries.append({
'id': this_video_id,
@@ -121,7 +126,7 @@ class JWPlatformBaseIE(InfoExtractor):
'description': video_data.get('description'),
'thumbnail': self._proto_relative_url(video_data.get('image')),
'timestamp': int_or_none(video_data.get('pubdate')),
'duration': float_or_none(jwplayer_data.get('duration')),
'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
'subtitles': subtitles,
'formats': formats,
})

View File

@@ -107,7 +107,7 @@ class KalturaIE(InfoExtractor):
(?P<q1>['\"])wid(?P=q1)\s*:\s*
(?P<q2>['\"])_?(?P<partner_id>(?:(?!(?P=q2)).)+)(?P=q2),.*?
(?P<q3>['\"])entry_?[Ii]d(?P=q3)\s*:\s*
(?P<q4>['\"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4),
(?P<q4>['\"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\})
""", webpage) or
re.search(
r'''(?xs)
@@ -266,6 +266,9 @@ class KalturaIE(InfoExtractor):
# skip for now.
if f.get('fileExt') == 'chun':
continue
if not f.get('fileExt') and f.get('containerFormat') == 'qt':
# QT indicates QuickTime; some videos have broken fileExt
f['fileExt'] = 'mov'
video_url = sign_url(
'%s/flavorId/%s' % (data_url, f['id']))
# audio-only has no videoCodecId (e.g. kaltura:1926081:0_c03e1b5g

View File

@@ -1,25 +1,115 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse_urlencode,
compat_urlparse,
)
from ..utils import (
ExtractorError,
sanitized_Request,
unified_strdate,
urlencode_postdata,
xpath_element,
xpath_text,
urljoin,
update_url_query,
)
class Laola1TvEmbedIE(InfoExtractor):
IE_NAME = 'laola1tv:embed'
_VALID_URL = r'https?://(?:www\.)?laola1\.tv/titanplayer\.php\?.*?\bvideoid=(?P<id>\d+)'
_TEST = {
# flashvars.premium = "false";
'url': 'https://www.laola1.tv/titanplayer.php?videoid=708065&type=V&lang=en&portal=int&customer=1024',
'info_dict': {
'id': '708065',
'ext': 'mp4',
'title': 'MA Long CHN - FAN Zhendong CHN',
'uploader': 'ITTF - International Table Tennis Federation',
'upload_date': '20161211',
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
flash_vars = self._search_regex(
r'(?s)flashvars\s*=\s*({.+?});', webpage, 'flash vars')
def get_flashvar(x, *args, **kwargs):
flash_var = self._search_regex(
r'%s\s*:\s*"([^"]+)"' % x,
flash_vars, x, default=None)
if not flash_var:
flash_var = self._search_regex([
r'flashvars\.%s\s*=\s*"([^"]+)"' % x,
r'%s\s*=\s*"([^"]+)"' % x],
webpage, x, *args, **kwargs)
return flash_var
hd_doc = self._download_xml(
'http://www.laola1.tv/server/hd_video.php', video_id, query={
'play': get_flashvar('streamid'),
'partner': get_flashvar('partnerid'),
'portal': get_flashvar('portalid'),
'lang': get_flashvar('sprache'),
'v5ident': '',
})
_v = lambda x, **k: xpath_text(hd_doc, './/video/' + x, **k)
title = _v('title', fatal=True)
token_url = None
premium = get_flashvar('premium', default=None)
if premium:
token_url = update_url_query(
_v('url', fatal=True), {
'timestamp': get_flashvar('timestamp'),
'auth': get_flashvar('auth'),
})
else:
data_abo = urlencode_postdata(
dict((i, v) for i, v in enumerate(_v('req_liga_abos').split(','))))
token_url = self._download_json(
'https://club.laola1.tv/sp/laola1/api/v3/user/session/premium/player/stream-access',
video_id, query={
'videoId': _v('id'),
'target': self._search_regex(r'vs_target = (\d+);', webpage, 'vs target'),
'label': _v('label'),
'area': _v('area'),
}, data=data_abo)['data']['stream-access'][0]
token_doc = self._download_xml(
token_url, video_id, 'Downloading token',
headers=self.geo_verification_headers())
token_attrib = xpath_element(token_doc, './/token').attrib
if token_attrib['status'] != '0':
raise ExtractorError(
'Token error: %s' % token_attrib['comment'], expected=True)
formats = self._extract_akamai_formats(
'%s?hdnea=%s' % (token_attrib['url'], token_attrib['auth']),
video_id)
self._sort_formats(formats)
categories_str = _v('meta_sports')
categories = categories_str.split(',') if categories_str else []
is_live = _v('islive') == 'true'
return {
'id': video_id,
'title': self._live_title(title) if is_live else title,
'upload_date': unified_strdate(_v('time_date')),
'uploader': _v('meta_organisation'),
'categories': categories,
'is_live': is_live,
'formats': formats,
}
class Laola1TvIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?laola1\.tv/(?P<lang>[a-z]+)-(?P<portal>[a-z]+)/(?P<kind>[^/]+)/(?P<slug>[^/?#&]+)'
IE_NAME = 'laola1tv'
_VALID_URL = r'https?://(?:www\.)?laola1\.tv/[a-z]+-[a-z]+/[^/]+/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie/227883.html',
'info_dict': {
@@ -67,85 +157,20 @@ class Laola1TvIE(InfoExtractor):
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('slug')
kind = mobj.group('kind')
lang = mobj.group('lang')
portal = mobj.group('portal')
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
if 'Dieser Livestream ist bereits beendet.' in webpage:
raise ExtractorError('This live stream has already finished.', expected=True)
iframe_url = self._search_regex(
iframe_url = urljoin(url, self._search_regex(
r'<iframe[^>]*?id="videoplayer"[^>]*?src="([^"]+)"',
webpage, 'iframe url')
video_id = self._search_regex(
r'videoid=(\d+)', iframe_url, 'video id')
iframe = self._download_webpage(compat_urlparse.urljoin(
url, iframe_url), display_id, 'Downloading iframe')
partner_id = self._search_regex(
r'partnerid\s*:\s*(["\'])(?P<partner_id>.+?)\1',
iframe, 'partner id', group='partner_id')
hd_doc = self._download_xml(
'http://www.laola1.tv/server/hd_video.php?%s'
% compat_urllib_parse_urlencode({
'play': video_id,
'partner': partner_id,
'portal': portal,
'lang': lang,
'v5ident': '',
}), display_id)
_v = lambda x, **k: xpath_text(hd_doc, './/video/' + x, **k)
title = _v('title', fatal=True)
VS_TARGETS = {
'video': '2',
'livestream': '17',
}
req = sanitized_Request(
'https://club.laola1.tv/sp/laola1/api/v3/user/session/premium/player/stream-access?%s' %
compat_urllib_parse_urlencode({
'videoId': video_id,
'target': VS_TARGETS.get(kind, '2'),
'label': _v('label'),
'area': _v('area'),
}),
urlencode_postdata(
dict((i, v) for i, v in enumerate(_v('req_liga_abos').split(',')))))
token_url = self._download_json(req, display_id)['data']['stream-access'][0]
token_doc = self._download_xml(token_url, display_id, 'Downloading token')
token_attrib = xpath_element(token_doc, './/token').attrib
token_auth = token_attrib['auth']
if token_auth in ('blocked', 'restricted', 'error'):
raise ExtractorError(
'Token error: %s' % token_attrib['comment'], expected=True)
formats = self._extract_f4m_formats(
'%s?hdnea=%s&hdcore=3.2.0' % (token_attrib['url'], token_auth),
video_id, f4m_id='hds')
self._sort_formats(formats)
categories_str = _v('meta_sports')
categories = categories_str.split(',') if categories_str else []
webpage, 'iframe url'))
return {
'id': video_id,
'_type': 'url',
'display_id': display_id,
'title': title,
'upload_date': unified_strdate(_v('time_date')),
'uploader': _v('meta_organisation'),
'categories': categories,
'is_live': _v('islive') == 'true',
'formats': formats,
'url': iframe_url,
'ie_key': 'Laola1TvEmbed',
}

View File

@@ -0,0 +1,104 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_duration,
unified_timestamp,
)
class MeipaiIE(InfoExtractor):
IE_DESC = '美拍'
_VALID_URL = r'https?://(?:www\.)?meipai.com/media/(?P<id>[0-9]+)'
_TESTS = [{
# regular uploaded video
'url': 'http://www.meipai.com/media/531697625',
'md5': 'e3e9600f9e55a302daecc90825854b4f',
'info_dict': {
'id': '531697625',
'ext': 'mp4',
'title': '#葉子##阿桑##余姿昀##超級女聲#',
'description': '#葉子##阿桑##余姿昀##超級女聲#',
'thumbnail': 're:^https?://.*\.jpg$',
'duration': 152,
'timestamp': 1465492420,
'upload_date': '20160609',
'view_count': 35511,
'creator': '她她-TATA',
'tags': ['葉子', '阿桑', '余姿昀', '超級女聲'],
}
}, {
# record of live streaming
'url': 'http://www.meipai.com/media/585526361',
'md5': 'ff7d6afdbc6143342408223d4f5fb99a',
'info_dict': {
'id': '585526361',
'ext': 'mp4',
'title': '姿昀和善願 練歌練琴啦😁😁😁',
'description': '姿昀和善願 練歌練琴啦😁😁😁',
'thumbnail': 're:^https?://.*\.jpg$',
'duration': 5975,
'timestamp': 1474311799,
'upload_date': '20160919',
'view_count': 1215,
'creator': '她她-TATA',
}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._og_search_title(
webpage, default=None) or self._html_search_regex(
r'<title[^>]*>([^<]+)</title>', webpage, 'title')
formats = []
# recorded playback of live streaming
m3u8_url = self._html_search_regex(
r'file:\s*encodeURIComponent\((["\'])(?P<url>(?:(?!\1).)+)\1\)',
webpage, 'm3u8 url', group='url', default=None)
if m3u8_url:
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
if not formats:
# regular uploaded video
video_url = self._search_regex(
r'data-video=(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, 'video url',
group='url', default=None)
if video_url:
formats.append({
'url': video_url,
'format_id': 'http',
})
timestamp = unified_timestamp(self._og_search_property(
'video:release_date', webpage, 'release date', fatal=False))
tags = self._og_search_property(
'video:tag', webpage, 'tags', default='').split(',')
view_count = int_or_none(self._html_search_meta(
'interactionCount', webpage, 'view count'))
duration = parse_duration(self._html_search_meta(
'duration', webpage, 'duration'))
creator = self._og_search_property(
'video:director', webpage, 'creator', fatal=False)
return {
'id': video_id,
'title': title,
'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
'duration': duration,
'timestamp': timestamp,
'view_count': view_count,
'creator': creator,
'tags': tags,
'formats': formats,
}

View File

@@ -0,0 +1,72 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
int_or_none,
urljoin,
)
class MelonVODIE(InfoExtractor):
_VALID_URL = r'https?://vod\.melon\.com/video/detail2\.html?\?.*?mvId=(?P<id>[0-9]+)'
_TEST = {
'url': 'http://vod.melon.com/video/detail2.htm?mvId=50158734',
'info_dict': {
'id': '50158734',
'ext': 'mp4',
'title': "Jessica 'Wonderland' MV Making Film",
'thumbnail': 're:^https?://.*\.jpg$',
'artist': 'Jessica (제시카)',
'upload_date': '20161212',
'duration': 203,
},
'params': {
'skip_download': 'm3u8 download',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
play_info = self._download_json(
'http://vod.melon.com/video/playerInfo.json', video_id,
note='Downloading player info JSON', query={'mvId': video_id})
title = play_info['mvInfo']['MVTITLE']
info = self._download_json(
'http://vod.melon.com/delivery/streamingInfo.json', video_id,
note='Downloading streaming info JSON',
query={
'contsId': video_id,
'contsType': 'VIDEO',
})
stream_info = info['streamingInfo']
formats = self._extract_m3u8_formats(
stream_info['encUrl'], video_id, 'mp4', m3u8_id='hls')
self._sort_formats(formats)
artist_list = play_info.get('artistList')
artist = None
if isinstance(artist_list, list):
artist = ', '.join(
[a['ARTISTNAMEWEBLIST']
for a in artist_list if a.get('ARTISTNAMEWEBLIST')])
thumbnail = urljoin(info.get('staticDomain'), stream_info.get('imgPath'))
duration = int_or_none(stream_info.get('playTime'))
upload_date = stream_info.get('mvSvcOpenDt', '')[:8] or None
return {
'id': video_id,
'title': title,
'artist': artist,
'thumbnail': thumbnail,
'upload_date': upload_date,
'duration': duration,
'formats': formats
}

View File

@@ -22,7 +22,7 @@ from ..utils import (
class MixcloudIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([^/]+)/(?!stream|uploads|favorites|listens|playlists)([^/]+)'
_VALID_URL = r'https?://(?:(?:www|beta|m)\.)?mixcloud\.com/([^/]+)/(?!stream|uploads|favorites|listens|playlists)([^/]+)'
IE_NAME = 'mixcloud'
_TESTS = [{
@@ -51,6 +51,9 @@ class MixcloudIE(InfoExtractor):
'view_count': int,
'like_count': int,
},
}, {
'url': 'https://beta.mixcloud.com/RedLightRadio/nosedrip-15-red-light-radio-01-18-2016/',
'only_matching': True,
}]
# See https://www.mixcloud.com/media/js2/www_js_2.9e23256562c080482435196ca3975ab5.js

View File

@@ -78,11 +78,6 @@ class MSNIE(InfoExtractor):
m3u8_formats = self._extract_m3u8_formats(
format_url, display_id, 'mp4',
m3u8_id='hls', fatal=False)
# Despite metadata in m3u8 all video+audio formats are
# actually video-only (no audio)
for f in m3u8_formats:
if f.get('acodec') != 'none' and f.get('vcodec') != 'none':
f['acodec'] = 'none'
formats.extend(m3u8_formats)
else:
formats.append({

View File

@@ -9,6 +9,7 @@ from ..utils import (
lowercase_escape,
smuggle_url,
unescapeHTML,
update_url_query,
)
@@ -208,7 +209,7 @@ class NBCNewsIE(ThePlatformIE):
'url': 'http://www.nbcnews.com/watch/nbcnews-com/how-twitter-reacted-to-the-snowden-interview-269389891880',
'md5': 'af1adfa51312291a017720403826bb64',
'info_dict': {
'id': '269389891880',
'id': 'p_tweet_snow_140529',
'ext': 'mp4',
'title': 'How Twitter Reacted To The Snowden Interview',
'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64',
@@ -232,7 +233,7 @@ class NBCNewsIE(ThePlatformIE):
'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844',
'md5': '73135a2e0ef819107bbb55a5a9b2a802',
'info_dict': {
'id': '394064451844',
'id': 'nn_netcast_150204',
'ext': 'mp4',
'title': 'Nightly News with Brian Williams Full Broadcast (February 4)',
'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5',
@@ -245,7 +246,7 @@ class NBCNewsIE(ThePlatformIE):
'url': 'http://www.nbcnews.com/business/autos/volkswagen-11-million-vehicles-could-have-suspect-software-emissions-scandal-n431456',
'md5': 'a49e173825e5fcd15c13fc297fced39d',
'info_dict': {
'id': '529953347624',
'id': 'x_lon_vwhorn_150922',
'ext': 'mp4',
'title': 'Volkswagen U.S. Chief:\xa0 We Have Totally Screwed Up',
'description': 'md5:c8be487b2d80ff0594c005add88d8351',
@@ -258,7 +259,7 @@ class NBCNewsIE(ThePlatformIE):
'url': 'http://www.today.com/video/see-the-aurora-borealis-from-space-in-stunning-new-nasa-video-669831235788',
'md5': '118d7ca3f0bea6534f119c68ef539f71',
'info_dict': {
'id': '669831235788',
'id': 'tdy_al_space_160420',
'ext': 'mp4',
'title': 'See the aurora borealis from space in stunning new NASA video',
'description': 'md5:74752b7358afb99939c5f8bb2d1d04b1',
@@ -271,7 +272,7 @@ class NBCNewsIE(ThePlatformIE):
'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924',
'md5': '6d236bf4f3dddc226633ce6e2c3f814d',
'info_dict': {
'id': '314487875924',
'id': 'n_hayes_Aimm_140801_272214',
'ext': 'mp4',
'title': 'The chaotic GOP immigration vote',
'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.',
@@ -279,7 +280,6 @@ class NBCNewsIE(ThePlatformIE):
'timestamp': 1406937606,
'upload_date': '20140802',
'uploader': 'NBCU-NEWS',
'categories': ['MSNBC/Topics/Franchise/Best of last night', 'MSNBC/Topics/General/Congress'],
},
},
{
@@ -311,28 +311,41 @@ class NBCNewsIE(ThePlatformIE):
else:
# "feature" and "nightly-news" pages use theplatform.com
video_id = mobj.group('mpx_id')
if not video_id.isdigit():
webpage = self._download_webpage(url, video_id)
info = None
bootstrap_json = self._search_regex(
[r'(?m)(?:var\s+(?:bootstrapJson|playlistData)|NEWS\.videoObj)\s*=\s*({.+});?\s*$',
r'videoObj\s*:\s*({.+})', r'data-video="([^"]+)"'],
webpage, 'bootstrap json', default=None)
webpage = self._download_webpage(url, video_id)
filter_param = 'byId'
bootstrap_json = self._search_regex(
[r'(?m)(?:var\s+(?:bootstrapJson|playlistData)|NEWS\.videoObj)\s*=\s*({.+});?\s*$',
r'videoObj\s*:\s*({.+})', r'data-video="([^"]+)"',
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);'],
webpage, 'bootstrap json', default=None)
if bootstrap_json:
bootstrap = self._parse_json(
bootstrap_json, video_id, transform_source=unescapeHTML)
info = None
if 'results' in bootstrap:
info = bootstrap['results'][0]['video']
elif 'video' in bootstrap:
info = bootstrap['video']
elif 'msnbcVideoInfo' in bootstrap:
info = bootstrap['msnbcVideoInfo']['meta']
elif 'msnbcThePlatform' in bootstrap:
info = bootstrap['msnbcThePlatform']['videoPlayer']['video']
else:
info = bootstrap
video_id = info['mpxId']
if 'guid' in info:
video_id = info['guid']
filter_param = 'byGuid'
elif 'mpxId' in info:
video_id = info['mpxId']
return {
'_type': 'url_transparent',
'id': video_id,
# http://feed.theplatform.com/f/2E2eJC/nbcnews also works
'url': 'http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews?byId=%s' % video_id,
'url': update_url_query('http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews', {filter_param: video_id}),
'ie_key': 'ThePlatformFeed',
}

View File

@@ -48,6 +48,13 @@ class NRKBaseIE(InfoExtractor):
entries = []
conviva = data.get('convivaStatistics') or {}
live = (data.get('mediaElementType') == 'Live' or
data.get('isLive') is True or conviva.get('isLive'))
def make_title(t):
return self._live_title(t) if live else t
media_assets = data.get('mediaAssets')
if media_assets and isinstance(media_assets, list):
def video_id_and_title(idx):
@@ -61,6 +68,13 @@ class NRKBaseIE(InfoExtractor):
if not formats:
continue
self._sort_formats(formats)
# Some f4m streams may not work with hdcore in fragments' URLs
for f in formats:
extra_param = f.get('extra_param_to_segment_url')
if extra_param and 'hdcore' in extra_param:
del f['extra_param_to_segment_url']
entry_id, entry_title = video_id_and_title(num)
duration = parse_duration(asset.get('duration'))
subtitles = {}
@@ -72,7 +86,7 @@ class NRKBaseIE(InfoExtractor):
})
entries.append({
'id': asset.get('carrierId') or entry_id,
'title': entry_title,
'title': make_title(entry_title),
'duration': duration,
'subtitles': subtitles,
'formats': formats,
@@ -87,7 +101,7 @@ class NRKBaseIE(InfoExtractor):
duration = parse_duration(data.get('duration'))
entries = [{
'id': video_id,
'title': title,
'title': make_title(title),
'duration': duration,
'formats': formats,
}]
@@ -111,7 +125,6 @@ class NRKBaseIE(InfoExtractor):
message_type, message_type)),
expected=True)
conviva = data.get('convivaStatistics') or {}
series = conviva.get('seriesName') or data.get('seriesTitle')
episode = conviva.get('episodeName') or data.get('episodeNumberOrDate')
@@ -260,6 +273,19 @@ class NRKTVIE(NRKBaseIE):
}]
class NRKTVDirekteIE(NRKTVIE):
IE_DESC = 'NRK TV Direkte and NRK Radio Direkte'
_VALID_URL = r'https?://(?:tv|radio)\.nrk\.no/direkte/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://tv.nrk.no/direkte/nrk1',
'only_matching': True,
}, {
'url': 'https://radio.nrk.no/direkte/p1_oslo_akershus',
'only_matching': True,
}]
class NRKPlaylistIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?nrk\.no/(?!video|skole)(?:[^/]+/)+(?P<id>[^/]+)'

View File

@@ -0,0 +1,60 @@
# coding: utf-8
from __future__ import unicode_literals
from .jwplatform import JWPlatformBaseIE
from ..utils import (
ExtractorError,
js_to_json,
)
class OnDemandKoreaIE(JWPlatformBaseIE):
_VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?P<id>[^/]+)\.html'
_TEST = {
'url': 'http://www.ondemandkorea.com/ask-us-anything-e43.html',
'info_dict': {
'id': 'ask-us-anything-e43',
'ext': 'mp4',
'title': 'Ask Us Anything : E43',
'thumbnail': 're:^https?://.*\.jpg$',
},
'params': {
'skip_download': 'm3u8 download'
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id, fatal=False)
if not webpage:
# Page sometimes returns captcha page with HTTP 403
raise ExtractorError(
'Unable to access page. You may have been blocked.',
expected=True)
if 'msg_block_01.png' in webpage:
self.raise_geo_restricted(
'This content is not available in your region')
if 'This video is only available to ODK PLUS members.' in webpage:
raise ExtractorError(
'This video is only available to ODK PLUS members.',
expected=True)
title = self._og_search_title(webpage)
jw_config = self._parse_json(
self._search_regex(
r'(?s)jwplayer\(([\'"])(?:(?!\1).)+\1\)\.setup\s*\((?P<options>.+?)\);',
webpage, 'jw config', group='options'),
video_id, transform_source=js_to_json)
info = self._parse_jwplayer_data(
jw_config, video_id, require_title=False, m3u8_id='hls',
base_url=url)
info.update({
'title': title,
'thumbnail': self._og_search_thumbnail(webpage),
})
return info

View File

@@ -10,7 +10,7 @@ from ..utils import (
class OpenloadIE(InfoExtractor):
_VALID_URL = r'https?://openload\.(?:co|io)/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
_VALID_URL = r'https?://(?:openload\.(?:co|io)|oload\.tv)/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
_TESTS = [{
'url': 'https://openload.co/f/kUEfGclsU9o',
@@ -51,6 +51,9 @@ class OpenloadIE(InfoExtractor):
# for title and ext
'url': 'https://openload.co/embed/Sxz5sADo82g/',
'only_matching': True,
}, {
'url': 'https://oload.tv/embed/KnG-kKZdcfY/',
'only_matching': True,
}]
def _real_extract(self, url):

View File

@@ -350,6 +350,15 @@ class PBSIE(InfoExtractor):
410: 'This video has expired and is no longer available for online streaming.',
}
def _real_initialize(self):
cookie = (self._download_json(
'http://localization.services.pbs.org/localize/auto/cookie/',
None, headers=self.geo_verification_headers(), fatal=False) or {}).get('cookie')
if cookie:
station = self._search_regex(r'#?s=\["([^"]+)"', cookie, 'station')
if station:
self._set_cookie('.pbs.org', 'pbsol.station', station)
def _extract_webpage(self, url):
mobj = re.match(self._VALID_URL, url)
@@ -476,7 +485,8 @@ class PBSIE(InfoExtractor):
redirect_info = self._download_json(
'%s?format=json' % redirect['url'], display_id,
'Downloading %s video url info' % (redirect_id or num))
'Downloading %s video url info' % (redirect_id or num),
headers=self.geo_verification_headers())
if redirect_info['status'] == 'error':
raise ExtractorError(

View File

@@ -0,0 +1,106 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
dict_get,
int_or_none,
unescapeHTML,
parse_iso8601,
)
class PikselIE(InfoExtractor):
_VALID_URL = r'https?://player\.piksel\.com/v/(?P<id>[a-z0-9]+)'
_TEST = {
'url': 'http://player.piksel.com/v/nv60p12f',
'md5': 'd9c17bbe9c3386344f9cfd32fad8d235',
'info_dict': {
'id': 'nv60p12f',
'ext': 'mp4',
'title': 'فن الحياة - الحلقة 1',
'description': 'احدث برامج الداعية الاسلامي " مصطفي حسني " فى رمضان 2016علي النهار نور',
'timestamp': 1465231790,
'upload_date': '20160606',
}
}
@staticmethod
def _extract_url(webpage):
mobj = re.search(
r'<iframe[^>]+src=["\'](?P<url>(?:https?:)?//player\.piksel\.com/v/[a-z0-9]+)',
webpage)
if mobj:
return mobj.group('url')
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
app_token = self._search_regex(
r'clientAPI\s*:\s*"([^"]+)"', webpage, 'app token')
response = self._download_json(
'http://player.piksel.com/ws/ws_program/api/%s/mode/json/apiv/5' % app_token,
video_id, query={
'v': video_id
})['response']
failure = response.get('failure')
if failure:
raise ExtractorError(response['failure']['reason'], expected=True)
video_data = response['WsProgramResponse']['program']['asset']
title = video_data['title']
formats = []
m3u8_url = dict_get(video_data, [
'm3u8iPadURL',
'ipadM3u8Url',
'm3u8AndroidURL',
'm3u8iPhoneURL',
'iphoneM3u8Url'])
if m3u8_url:
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
asset_type = dict_get(video_data, ['assetType', 'asset_type'])
for asset_file in video_data.get('assetFiles', []):
# TODO: extract rtmp formats
http_url = asset_file.get('http_url')
if not http_url:
continue
tbr = None
vbr = int_or_none(asset_file.get('videoBitrate'), 1024)
abr = int_or_none(asset_file.get('audioBitrate'), 1024)
if asset_type == 'video':
tbr = vbr + abr
elif asset_type == 'audio':
tbr = abr
format_id = ['http']
if tbr:
format_id.append(compat_str(tbr))
formats.append({
'format_id': '-'.join(format_id),
'url': unescapeHTML(http_url),
'vbr': vbr,
'abr': abr,
'width': int_or_none(asset_file.get('videoWidth')),
'height': int_or_none(asset_file.get('videoHeight')),
'filesize': int_or_none(asset_file.get('filesize')),
'tbr': tbr,
})
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'description': video_data.get('description'),
'thumbnail': video_data.get('thumbnailUrl'),
'timestamp': parse_iso8601(video_data.get('dateadd')),
'formats': formats,
}

View File

@@ -85,6 +85,9 @@ class ProSiebenSat1BaseIE(InfoExtractor):
formats.extend(self._extract_m3u8_formats(
source_url, clip_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
elif mimetype == 'application/dash+xml':
formats.extend(self._extract_mpd_formats(
source_url, clip_id, mpd_id='dash', fatal=False))
else:
tbr = fix_bitrate(source['bitrate'])
if protocol in ('rtmp', 'rtmpe'):

View File

@@ -4,118 +4,31 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..utils import (
float_or_none,
parse_iso8601,
unescapeHTML,
ExtractorError,
)
class RteIE(InfoExtractor):
IE_NAME = 'rte'
IE_DESC = 'Raidió Teilifís Éireann TV'
_VALID_URL = r'https?://(?:www\.)?rte\.ie/player/[^/]{2,3}/show/[^/]+/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.rte.ie/player/ie/show/iwitness-862/10478715/',
'info_dict': {
'id': '10478715',
'ext': 'flv',
'title': 'Watch iWitness online',
'thumbnail': 're:^https?://.*\.jpg$',
'description': 'iWitness : The spirit of Ireland, one voice and one minute at a time.',
'duration': 60.046,
},
'params': {
'skip_download': 'f4m fails with --test atm'
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._og_search_title(webpage)
description = self._html_search_meta('description', webpage, 'description')
duration = float_or_none(self._html_search_meta(
'duration', webpage, 'duration', fatal=False), 1000)
thumbnail = None
thumbnail_meta = self._html_search_meta('thumbnail', webpage)
if thumbnail_meta:
thumbnail_id = self._search_regex(
r'uri:irus:(.+)', thumbnail_meta,
'thumbnail id', fatal=False)
if thumbnail_id:
thumbnail = 'http://img.rasset.ie/%s.jpg' % thumbnail_id
feeds_url = self._html_search_meta('feeds-prefix', webpage, 'feeds url') + video_id
json_string = self._download_json(feeds_url, video_id)
# f4m_url = server + relative_url
f4m_url = json_string['shows'][0]['media:group'][0]['rte:server'] + json_string['shows'][0]['media:group'][0]['url']
f4m_formats = self._extract_f4m_formats(f4m_url, video_id)
self._sort_formats(f4m_formats)
return {
'id': video_id,
'title': title,
'formats': f4m_formats,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
}
class RteRadioIE(InfoExtractor):
IE_NAME = 'rte:radio'
IE_DESC = 'Raidió Teilifís Éireann radio'
# Radioplayer URLs have two distinct specifier formats,
# the old format #!rii=<channel_id>:<id>:<playable_item_id>:<date>:
# the new format #!rii=b<channel_id>_<id>_<playable_item_id>_<date>_
# where the IDs are int/empty, the date is DD-MM-YYYY, and the specifier may be truncated.
# An <id> uniquely defines an individual recording, and is the only part we require.
_VALID_URL = r'https?://(?:www\.)?rte\.ie/radio/utils/radioplayer/rteradioweb\.html#!rii=(?:b?[0-9]*)(?:%3A|:|%5F|_)(?P<id>[0-9]+)'
_TESTS = [{
# Old-style player URL; HLS and RTMPE formats
'url': 'http://www.rte.ie/radio/utils/radioplayer/rteradioweb.html#!rii=16:10507902:2414:27-12-2015:',
'info_dict': {
'id': '10507902',
'ext': 'mp4',
'title': 'Gloria',
'thumbnail': 're:^https?://.*\.jpg$',
'description': 'md5:9ce124a7fb41559ec68f06387cabddf0',
'timestamp': 1451203200,
'upload_date': '20151227',
'duration': 7230.0,
},
'params': {
'skip_download': 'f4m fails with --test atm'
}
}, {
# New-style player URL; RTMPE formats only
'url': 'http://rte.ie/radio/utils/radioplayer/rteradioweb.html#!rii=b16_3250678_8861_06-04-2012_',
'info_dict': {
'id': '3250678',
'ext': 'flv',
'title': 'The Lyric Concert with Paul Herriott',
'thumbnail': 're:^https?://.*\.jpg$',
'description': '',
'timestamp': 1333742400,
'upload_date': '20120406',
'duration': 7199.016,
},
'params': {
'skip_download': 'f4m fails with --test atm'
}
}]
class RteBaseIE(InfoExtractor):
def _real_extract(self, url):
item_id = self._match_id(url)
json_string = self._download_json(
'http://www.rte.ie/rteavgen/getplaylist/?type=web&format=json&id=' + item_id,
item_id)
try:
json_string = self._download_json(
'http://www.rte.ie/rteavgen/getplaylist/?type=web&format=json&id=' + item_id,
item_id)
except ExtractorError as ee:
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404:
error_info = self._parse_json(ee.cause.read().decode(), item_id, fatal=False)
if error_info:
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, error_info['message']),
expected=True)
raise
# NB the string values in the JSON are stored using XML escaping(!)
show = json_string['shows'][0]
@@ -163,3 +76,67 @@ class RteRadioIE(InfoExtractor):
'duration': duration,
'formats': formats,
}
class RteIE(RteBaseIE):
IE_NAME = 'rte'
IE_DESC = 'Raidió Teilifís Éireann TV'
_VALID_URL = r'https?://(?:www\.)?rte\.ie/player/[^/]{2,3}/show/[^/]+/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.rte.ie/player/ie/show/iwitness-862/10478715/',
'md5': '4a76eb3396d98f697e6e8110563d2604',
'info_dict': {
'id': '10478715',
'ext': 'mp4',
'title': 'iWitness',
'thumbnail': 're:^https?://.*\.jpg$',
'description': 'The spirit of Ireland, one voice and one minute at a time.',
'duration': 60.046,
'upload_date': '20151012',
'timestamp': 1444694160,
},
}
class RteRadioIE(RteBaseIE):
IE_NAME = 'rte:radio'
IE_DESC = 'Raidió Teilifís Éireann radio'
# Radioplayer URLs have two distinct specifier formats,
# the old format #!rii=<channel_id>:<id>:<playable_item_id>:<date>:
# the new format #!rii=b<channel_id>_<id>_<playable_item_id>_<date>_
# where the IDs are int/empty, the date is DD-MM-YYYY, and the specifier may be truncated.
# An <id> uniquely defines an individual recording, and is the only part we require.
_VALID_URL = r'https?://(?:www\.)?rte\.ie/radio/utils/radioplayer/rteradioweb\.html#!rii=(?:b?[0-9]*)(?:%3A|:|%5F|_)(?P<id>[0-9]+)'
_TESTS = [{
# Old-style player URL; HLS and RTMPE formats
'url': 'http://www.rte.ie/radio/utils/radioplayer/rteradioweb.html#!rii=16:10507902:2414:27-12-2015:',
'md5': 'c79ccb2c195998440065456b69760411',
'info_dict': {
'id': '10507902',
'ext': 'mp4',
'title': 'Gloria',
'thumbnail': 're:^https?://.*\.jpg$',
'description': 'md5:9ce124a7fb41559ec68f06387cabddf0',
'timestamp': 1451203200,
'upload_date': '20151227',
'duration': 7230.0,
},
}, {
# New-style player URL; RTMPE formats only
'url': 'http://rte.ie/radio/utils/radioplayer/rteradioweb.html#!rii=b16_3250678_8861_06-04-2012_',
'info_dict': {
'id': '3250678',
'ext': 'flv',
'title': 'The Lyric Concert with Paul Herriott',
'thumbnail': 're:^https?://.*\.jpg$',
'description': '',
'timestamp': 1333742400,
'upload_date': '20120406',
'duration': 7199.016,
},
'params': {
# rtmp download
'skip_download': True,
},
}]

View File

@@ -2,7 +2,9 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import int_or_none
class RTL2IE(InfoExtractor):
@@ -13,7 +15,7 @@ class RTL2IE(InfoExtractor):
'id': 'folge-203-0',
'ext': 'f4v',
'title': 'GRIP sucht den Sommerkönig',
'description': 'Matthias, Det und Helge treten gegeneinander an.'
'description': 'md5:e3adbb940fd3c6e76fa341b8748b562f'
},
'params': {
# rtmp download
@@ -25,7 +27,7 @@ class RTL2IE(InfoExtractor):
'id': '21040-anna-erwischt-alex',
'ext': 'mp4',
'title': 'Anna erwischt Alex!',
'description': 'Anna ist Alex\' Tochter bei Köln 50667.'
'description': 'Anna nimmt ihrem Vater nicht ab, dass er nicht spielt. Und tatsächlich erwischt sie ihn auf frischer Tat.'
},
'params': {
# rtmp download
@@ -52,34 +54,47 @@ class RTL2IE(InfoExtractor):
r'vico_id\s*:\s*([0-9]+)', webpage, 'vico_id')
vivi_id = self._html_search_regex(
r'vivi_id\s*:\s*([0-9]+)', webpage, 'vivi_id')
info_url = 'http://www.rtl2.de/video/php/get_video.php?vico_id=' + vico_id + '&vivi_id=' + vivi_id
info = self._download_json(info_url, video_id)
info = self._download_json(
'http://www.rtl2.de/sites/default/modules/rtl2/mediathek/php/get_video_jw.php',
video_id, query={
'vico_id': vico_id,
'vivi_id': vivi_id,
})
video_info = info['video']
title = video_info['titel']
description = video_info.get('beschreibung')
thumbnail = video_info.get('image')
download_url = video_info['streamurl']
download_url = download_url.replace('\\', '')
stream_url = 'mp4:' + self._html_search_regex(r'ondemand/(.*)', download_url, 'stream URL')
rtmp_conn = ['S:connect', 'O:1', 'NS:pageUrl:' + url, 'NB:fpad:0', 'NN:videoFunction:1', 'O:0']
formats = []
rtmp_url = video_info.get('streamurl')
if rtmp_url:
rtmp_url = rtmp_url.replace('\\', '')
stream_url = 'mp4:' + self._html_search_regex(r'/ondemand/(.+)', rtmp_url, 'stream URL')
rtmp_conn = ['S:connect', 'O:1', 'NS:pageUrl:' + url, 'NB:fpad:0', 'NN:videoFunction:1', 'O:0']
formats.append({
'format_id': 'rtmp',
'url': rtmp_url,
'play_path': stream_url,
'player_url': 'http://www.rtl2.de/flashplayer/vipo_player.swf',
'page_url': url,
'flash_version': 'LNX 11,2,202,429',
'rtmp_conn': rtmp_conn,
'no_resume': True,
'preference': 1,
})
m3u8_url = video_info.get('streamurl_hls')
if m3u8_url:
formats.extend(self._extract_akamai_formats(m3u8_url, video_id))
formats = [{
'url': download_url,
'play_path': stream_url,
'player_url': 'http://www.rtl2.de/flashplayer/vipo_player.swf',
'page_url': url,
'flash_version': 'LNX 11,2,202,429',
'rtmp_conn': rtmp_conn,
'no_resume': True,
}]
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'description': description,
'thumbnail': video_info.get('image'),
'description': video_info.get('beschreibung'),
'duration': int_or_none(video_info.get('duration')),
'formats': formats,
}

View File

@@ -4,27 +4,24 @@ from __future__ import unicode_literals
import re
from .srgssr import SRGSSRIE
from ..compat import (
compat_str,
compat_urllib_parse_urlparse,
)
from ..compat import compat_str
from ..utils import (
int_or_none,
parse_duration,
parse_iso8601,
unescapeHTML,
xpath_text,
determine_ext,
)
class RTSIE(SRGSSRIE):
IE_DESC = 'RTS.ch'
_VALID_URL = r'rts:(?P<rts_id>\d+)|https?://(?:www\.)?rts\.ch/(?:[^/]+/){2,}(?P<id>[0-9]+)-(?P<display_id>.+?)\.html'
_VALID_URL = r'rts:(?P<rts_id>\d+)|https?://(?:.+?\.)?rts\.ch/(?:[^/]+/){2,}(?P<id>[0-9]+)-(?P<display_id>.+?)\.html'
_TESTS = [
{
'url': 'http://www.rts.ch/archives/tv/divers/3449373-les-enfants-terribles.html',
'md5': 'f254c4b26fb1d3c183793d52bc40d3e7',
'md5': 'ff7f8450a90cf58dacb64e29707b4a8e',
'info_dict': {
'id': '3449373',
'display_id': 'les-enfants-terribles',
@@ -38,35 +35,17 @@ class RTSIE(SRGSSRIE):
'thumbnail': 're:^https?://.*\.image',
'view_count': int,
},
'params': {
# m3u8 download
'skip_download': True,
}
},
{
'url': 'http://www.rts.ch/emissions/passe-moi-les-jumelles/5624067-entre-ciel-et-mer.html',
'md5': 'f1077ac5af686c76528dc8d7c5df29ba',
'info_dict': {
'id': '5742494',
'display_id': '5742494',
'ext': 'mp4',
'duration': 3720,
'title': 'Les yeux dans les cieux - Mon homard au Canada',
'description': 'md5:d22ee46f5cc5bac0912e5a0c6d44a9f7',
'uploader': 'Passe-moi les jumelles',
'upload_date': '20140404',
'timestamp': 1396635300,
'thumbnail': 're:^https?://.*\.image',
'view_count': int,
'id': '5624065',
'title': 'Passe-moi les jumelles',
},
'params': {
# m3u8 download
'skip_download': True,
}
'playlist_mincount': 4,
},
{
'url': 'http://www.rts.ch/video/sport/hockey/5745975-1-2-kloten-fribourg-5-2-second-but-pour-gotteron-par-kwiatowski.html',
'md5': 'b4326fecd3eb64a458ba73c73e91299d',
'info_dict': {
'id': '5745975',
'display_id': '1-2-kloten-fribourg-5-2-second-but-pour-gotteron-par-kwiatowski',
@@ -80,11 +59,15 @@ class RTSIE(SRGSSRIE):
'thumbnail': 're:^https?://.*\.image',
'view_count': int,
},
'params': {
# m3u8 download
'skip_download': True,
},
'skip': 'Blocked outside Switzerland',
},
{
'url': 'http://www.rts.ch/video/info/journal-continu/5745356-londres-cachee-par-un-epais-smog.html',
'md5': '9f713382f15322181bb366cc8c3a4ff0',
'md5': '1bae984fe7b1f78e94abc74e802ed99f',
'info_dict': {
'id': '5745356',
'display_id': 'londres-cachee-par-un-epais-smog',
@@ -92,16 +75,12 @@ class RTSIE(SRGSSRIE):
'duration': 33,
'title': 'Londres cachée par un épais smog',
'description': 'Un important voile de smog recouvre Londres depuis mercredi, provoqué par la pollution et du sable du Sahara.',
'uploader': 'Le Journal en continu',
'uploader': 'L\'actu en vidéo',
'upload_date': '20140403',
'timestamp': 1396537322,
'thumbnail': 're:^https?://.*\.image',
'view_count': int,
},
'params': {
# m3u8 download
'skip_download': True,
}
},
{
'url': 'http://www.rts.ch/audio/couleur3/programmes/la-belle-video-de-stephane-laurenceau/5706148-urban-hippie-de-damien-krisl-03-04-2014.html',
@@ -125,6 +104,10 @@ class RTSIE(SRGSSRIE):
'title': 'Hockey: Davos décroche son 31e titre de champion de Suisse',
},
'playlist_mincount': 5,
},
{
'url': 'http://pages.rts.ch/emissions/passe-moi-les-jumelles/5624065-entre-ciel-et-mer.html',
'only_matching': True,
}
]
@@ -142,19 +125,32 @@ class RTSIE(SRGSSRIE):
# media_id extracted out of URL is not always a real id
if 'video' not in all_info and 'audio' not in all_info:
page = self._download_webpage(url, display_id)
entries = []
# article with videos on rhs
videos = re.findall(
r'<article[^>]+class="content-item"[^>]*>\s*<a[^>]+data-video-urn="urn:([^"]+)"',
page)
if not videos:
for item in all_info.get('items', []):
item_url = item.get('url')
if not item_url:
continue
entries.append(self.url_result(item_url, 'RTS'))
if not entries:
page, urlh = self._download_webpage_handle(url, display_id)
if re.match(self._VALID_URL, urlh.geturl()).group('id') != media_id:
return self.url_result(urlh.geturl(), 'RTS')
# article with videos on rhs
videos = re.findall(
r'(?s)<iframe[^>]+class="srg-player"[^>]+src="[^"]+urn:([^"]+)"',
r'<article[^>]+class="content-item"[^>]*>\s*<a[^>]+data-video-urn="urn:([^"]+)"',
page)
if videos:
entries = [self.url_result('srgssr:%s' % video_urn, 'SRGSSR') for video_urn in videos]
return self.playlist_result(entries, media_id, self._og_search_title(page))
if not videos:
videos = re.findall(
r'(?s)<iframe[^>]+class="srg-player"[^>]+src="[^"]+urn:([^"]+)"',
page)
if videos:
entries = [self.url_result('srgssr:%s' % video_urn, 'SRGSSR') for video_urn in videos]
if entries:
return self.playlist_result(entries, media_id, all_info.get('title'))
internal_id = self._html_search_regex(
r'<(?:video|audio) data-id="([0-9]+)"', page,
@@ -168,36 +164,29 @@ class RTSIE(SRGSSRIE):
info = all_info['video']['JSONinfo'] if 'video' in all_info else all_info['audio']
upload_timestamp = parse_iso8601(info.get('broadcast_date'))
duration = info.get('duration') or info.get('cutout') or info.get('cutduration')
if isinstance(duration, compat_str):
duration = parse_duration(duration)
view_count = info.get('plays')
thumbnail = unescapeHTML(info.get('preview_image_url'))
title = info['title']
def extract_bitrate(url):
return int_or_none(self._search_regex(
r'-([0-9]+)k\.', url, 'bitrate', default=None))
formats = []
for format_id, format_url in info['streams'].items():
if format_id == 'hds_sd' and 'hds' in info['streams']:
streams = info.get('streams', {})
for format_id, format_url in streams.items():
if format_id == 'hds_sd' and 'hds' in streams:
continue
if format_id == 'hls_sd' and 'hls' in info['streams']:
if format_id == 'hls_sd' and 'hls' in streams:
continue
if format_url.endswith('.f4m'):
token = self._download_xml(
'http://tp.srgssr.ch/token/akahd.xml?stream=%s/*' % compat_urllib_parse_urlparse(format_url).path,
media_id, 'Downloading %s token' % format_id)
auth_params = xpath_text(token, './/authparams', 'auth params')
if not auth_params:
continue
formats.extend(self._extract_f4m_formats(
'%s?%s&hdcore=3.4.0&plugin=aasp-3.4.0.132.66' % (format_url, auth_params),
media_id, f4m_id=format_id, fatal=False))
elif format_url.endswith('.m3u8'):
formats.extend(self._extract_m3u8_formats(
format_url, media_id, 'mp4', 'm3u8_native', m3u8_id=format_id, fatal=False))
ext = determine_ext(format_url)
if ext in ('m3u8', 'f4m'):
format_url = self._get_tokenized_src(format_url, media_id, format_id)
if ext == 'f4m':
formats.extend(self._extract_f4m_formats(
format_url + ('?' if '?' not in format_url else '&') + 'hdcore=3.4.0',
media_id, f4m_id=format_id, fatal=False))
else:
formats.extend(self._extract_m3u8_formats(
format_url, media_id, 'mp4', 'm3u8_native', m3u8_id=format_id, fatal=False))
else:
formats.append({
'format_id': format_id,
@@ -205,25 +194,37 @@ class RTSIE(SRGSSRIE):
'tbr': extract_bitrate(format_url),
})
if 'media' in info:
formats.extend([{
'format_id': '%s-%sk' % (media['ext'], media['rate']),
'url': 'http://download-video.rts.ch/%s' % media['url'],
'tbr': media['rate'] or extract_bitrate(media['url']),
} for media in info['media'] if media.get('rate')])
for media in info.get('media', []):
media_url = media.get('url')
if not media_url or re.match(r'https?://', media_url):
continue
rate = media.get('rate')
ext = media.get('ext') or determine_ext(media_url, 'mp4')
format_id = ext
if rate:
format_id += '-%dk' % rate
formats.append({
'format_id': format_id,
'url': 'http://download-video.rts.ch/' + media_url,
'tbr': rate or extract_bitrate(media_url),
})
self._check_formats(formats, media_id)
self._sort_formats(formats)
duration = info.get('duration') or info.get('cutout') or info.get('cutduration')
if isinstance(duration, compat_str):
duration = parse_duration(duration)
return {
'id': media_id,
'display_id': display_id,
'formats': formats,
'title': info['title'],
'title': title,
'description': info.get('intro'),
'duration': duration,
'view_count': view_count,
'view_count': int_or_none(info.get('plays')),
'uploader': info.get('programName'),
'timestamp': upload_timestamp,
'thumbnail': thumbnail,
'timestamp': parse_iso8601(info.get('broadcast_date')),
'thumbnail': unescapeHTML(info.get('preview_image_url')),
}

View File

@@ -4,6 +4,7 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_urllib_parse_urlparse
from ..utils import (
ExtractorError,
parse_iso8601,
@@ -23,6 +24,16 @@ class SRGSSRIE(InfoExtractor):
'STARTDATE': 'This video is not yet available. Please try again later.',
}
def _get_tokenized_src(self, url, video_id, format_id):
sp = compat_urllib_parse_urlparse(url).path.split('/')
token = self._download_json(
'http://tp.srgssr.ch/akahd/token?acl=/%s/%s/*' % (sp[1], sp[2]),
video_id, 'Downloading %s token' % format_id, fatal=False) or {}
auth_params = token.get('token', {}).get('authparams')
if auth_params:
url += '?' + auth_params
return url
def get_media_data(self, bu, media_type, media_id):
media_data = self._download_json(
'http://il.srgssr.ch/integrationlayer/1.0/ue/%s/%s/play/%s.json' % (bu, media_type, media_id),
@@ -61,14 +72,16 @@ class SRGSSRIE(InfoExtractor):
asset_url = asset['text']
quality = asset['@quality']
format_id = '%s-%s' % (protocol, quality)
if protocol == 'HTTP-HDS':
formats.extend(self._extract_f4m_formats(
asset_url + '?hdcore=3.4.0', media_id,
f4m_id=format_id, fatal=False))
elif protocol == 'HTTP-HLS':
formats.extend(self._extract_m3u8_formats(
asset_url, media_id, 'mp4', 'm3u8_native',
m3u8_id=format_id, fatal=False))
if protocol.startswith('HTTP-HDS') or protocol.startswith('HTTP-HLS'):
asset_url = self._get_tokenized_src(asset_url, media_id, format_id)
if protocol.startswith('HTTP-HDS'):
formats.extend(self._extract_f4m_formats(
asset_url + ('?' if '?' not in asset_url else '&') + 'hdcore=3.4.0',
media_id, f4m_id=format_id, fatal=False))
elif protocol.startswith('HTTP-HLS'):
formats.extend(self._extract_m3u8_formats(
asset_url, media_id, 'mp4', 'm3u8_native',
m3u8_id=format_id, fatal=False))
else:
formats.append({
'format_id': format_id,
@@ -94,10 +107,10 @@ class SRGSSRPlayIE(InfoExtractor):
_TESTS = [{
'url': 'http://www.srf.ch/play/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5',
'md5': '4cd93523723beff51bb4bee974ee238d',
'md5': 'da6b5b3ac9fa4761a942331cef20fcb3',
'info_dict': {
'id': '28e1a57d-5b76-4399-8ab3-9097f071e6c5',
'ext': 'm4v',
'ext': 'mp4',
'upload_date': '20130701',
'title': 'Snowden beantragt Asyl in Russland',
'timestamp': 1372713995,

View File

@@ -33,7 +33,9 @@ _x = lambda p: xpath_with_ns(p, {'smil': default_ns})
class ThePlatformBaseIE(OnceIE):
def _extract_theplatform_smil(self, smil_url, video_id, note='Downloading SMIL data'):
meta = self._download_xml(smil_url, video_id, note=note, query={'format': 'SMIL'})
meta = self._download_xml(
smil_url, video_id, note=note, query={'format': 'SMIL'},
headers=self.geo_verification_headers())
error_element = find_xpath_attr(meta, _x('.//smil:ref'), 'src')
if error_element is not None and error_element.attrib['src'].startswith(
'http://link.theplatform.com/s/errorFiles/Unavailable.'):

View File

@@ -300,7 +300,7 @@ class TwitchPlaylistBaseIE(TwitchBaseIE):
response = self._call_api(
self._PLAYLIST_PATH % (channel_id, offset, limit),
channel_id,
'Downloading %s videos JSON page %s'
'Downloading %s JSON page %s'
% (self._PLAYLIST_TYPE, counter_override or counter))
page_entries = self._extract_playlist_page(response)
if not page_entries:
@@ -350,19 +350,72 @@ class TwitchProfileIE(TwitchPlaylistBaseIE):
}
class TwitchPastBroadcastsIE(TwitchPlaylistBaseIE):
IE_NAME = 'twitch:past_broadcasts'
_VALID_URL = r'%s/(?P<id>[^/]+)/profile/past_broadcasts/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
_PLAYLIST_PATH = TwitchPlaylistBaseIE._PLAYLIST_PATH + '&broadcasts=true'
_PLAYLIST_TYPE = 'past broadcasts'
class TwitchVideosBaseIE(TwitchPlaylistBaseIE):
_VALID_URL_VIDEOS_BASE = r'%s/(?P<id>[^/]+)/videos' % TwitchBaseIE._VALID_URL_BASE
_PLAYLIST_PATH = TwitchPlaylistBaseIE._PLAYLIST_PATH + '&broadcast_type='
class TwitchAllVideosIE(TwitchVideosBaseIE):
IE_NAME = 'twitch:videos:all'
_VALID_URL = r'%s/all' % TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE
_PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'archive,upload,highlight'
_PLAYLIST_TYPE = 'all videos'
_TEST = {
'url': 'http://www.twitch.tv/spamfish/profile/past_broadcasts',
'url': 'https://www.twitch.tv/spamfish/videos/all',
'info_dict': {
'id': 'spamfish',
'title': 'Spamfish',
},
'playlist_mincount': 54,
'playlist_mincount': 869,
}
class TwitchUploadsIE(TwitchVideosBaseIE):
IE_NAME = 'twitch:videos:uploads'
_VALID_URL = r'%s/uploads' % TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE
_PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'upload'
_PLAYLIST_TYPE = 'uploads'
_TEST = {
'url': 'https://www.twitch.tv/spamfish/videos/uploads',
'info_dict': {
'id': 'spamfish',
'title': 'Spamfish',
},
'playlist_mincount': 0,
}
class TwitchPastBroadcastsIE(TwitchVideosBaseIE):
IE_NAME = 'twitch:videos:past-broadcasts'
_VALID_URL = r'%s/past-broadcasts' % TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE
_PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'archive'
_PLAYLIST_TYPE = 'past broadcasts'
_TEST = {
'url': 'https://www.twitch.tv/spamfish/videos/past-broadcasts',
'info_dict': {
'id': 'spamfish',
'title': 'Spamfish',
},
'playlist_mincount': 0,
}
class TwitchHighlightsIE(TwitchVideosBaseIE):
IE_NAME = 'twitch:videos:highlights'
_VALID_URL = r'%s/highlights' % TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE
_PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'highlight'
_PLAYLIST_TYPE = 'highlights'
_TEST = {
'url': 'https://www.twitch.tv/spamfish/videos/highlights',
'info_dict': {
'id': 'spamfish',
'title': 'Spamfish',
},
'playlist_mincount': 805,
}

View File

@@ -0,0 +1,33 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class UKTVPlayIE(InfoExtractor):
_VALID_URL = r'https?://uktvplay\.uktv\.co\.uk/.+?\?.*?\bvideo=(?P<id>\d+)'
_TEST = {
'url': 'https://uktvplay.uktv.co.uk/shows/world-at-war/c/200/watch-online/?video=2117008346001',
'md5': '',
'info_dict': {
'id': '2117008346001',
'ext': 'mp4',
'title': 'Pincers',
'description': 'Pincers',
'uploader_id': '1242911124001',
'upload_date': '20130124',
'timestamp': 1359049267,
},
'params': {
# m3u8 download
'skip_download': True,
},
'expected_warnings': ['Failed to download MPD manifest']
}
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1242911124001/H1xnMOqP_default/index.html?videoId=%s'
def _real_extract(self, url):
video_id = self._match_id(url)
return self.url_result(
self.BRIGHTCOVE_URL_TEMPLATE % video_id,
'BrightcoveNew', video_id)

View File

@@ -30,7 +30,9 @@ class UplynkIE(InfoExtractor):
def _extract_uplynk_info(self, uplynk_content_url):
path, external_id, video_id, session_id = re.match(UplynkIE._VALID_URL, uplynk_content_url).groups()
display_id = video_id or external_id
formats = self._extract_m3u8_formats('http://content.uplynk.com/%s.m3u8' % path, display_id, 'mp4')
formats = self._extract_m3u8_formats(
'http://content.uplynk.com/%s.m3u8' % path,
display_id, 'mp4', 'm3u8_native')
if session_id:
for f in formats:
f['extra_param_to_segment_url'] = 'pbs=' + session_id

View File

@@ -4,11 +4,22 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import urlencode_postdata
from ..utils import ExtractorError
class Vbox7IE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?vbox7\.com/(?:play:|emb/external\.php\?.*?\bvid=)(?P<id>[\da-fA-F]+)'
_VALID_URL = r'''(?x)
https?://
(?:[^/]+\.)?vbox7\.com/
(?:
play:|
(?:
emb/external\.php|
player/ext\.swf
)\?.*?\bvid=
)
(?P<id>[\da-fA-F]+)
'''
_TESTS = [{
'url': 'http://vbox7.com/play:0946fff23c',
'md5': 'a60f9ab3a3a2f013ef9a967d5f7be5bf',
@@ -16,6 +27,14 @@ class Vbox7IE(InfoExtractor):
'id': '0946fff23c',
'ext': 'mp4',
'title': 'Борисов: Притеснен съм за бъдещето на България',
'description': 'По думите му е опасно страната ни да бъде обявена за "сигурна"',
'thumbnail': 're:^https?://.*\.jpg$',
'timestamp': 1470982814,
'upload_date': '20160812',
'uploader': 'zdraveibulgaria',
},
'params': {
'proxy': '127.0.0.1:8118',
},
}, {
'url': 'http://vbox7.com/play:249bb972c2',
@@ -29,6 +48,9 @@ class Vbox7IE(InfoExtractor):
}, {
'url': 'http://vbox7.com/emb/external.php?vid=a240d20f9c&autoplay=1',
'only_matching': True,
}, {
'url': 'http://i49.vbox7.com/player/ext.swf?vid=0946fff23c&autoplay=1',
'only_matching': True,
}]
@staticmethod
@@ -42,33 +64,41 @@ class Vbox7IE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
'http://vbox7.com/play:%s' % video_id, video_id)
response = self._download_json(
'https://www.vbox7.com/ajax/video/nextvideo.php?vid=%s' % video_id,
video_id)
title = self._html_search_regex(
r'<title>(.+?)</title>', webpage, 'title').split('/')[0].strip()
if 'error' in response:
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, response['error']), expected=True)
video_url = self._search_regex(
r'src\s*:\s*(["\'])(?P<url>.+?.mp4.*?)\1',
webpage, 'video url', default=None, group='url')
video = response['options']
thumbnail_url = self._og_search_thumbnail(webpage)
if not video_url:
info_response = self._download_webpage(
'http://vbox7.com/play/magare.do', video_id,
'Downloading info webpage',
data=urlencode_postdata({'as3': '1', 'vid': video_id}),
headers={'Content-Type': 'application/x-www-form-urlencoded'})
final_url, thumbnail_url = map(
lambda x: x.split('=')[1], info_response.split('&'))
title = video['title']
video_url = video['src']
if '/na.mp4' in video_url:
self.raise_geo_restricted()
return {
uploader = video.get('uploader')
webpage = self._download_webpage(
'http://vbox7.com/play:%s' % video_id, video_id, fatal=None)
info = {}
if webpage:
info = self._search_json_ld(
webpage.replace('"/*@context"', '"@context"'), video_id,
fatal=False)
info.update({
'id': video_id,
'url': self._proto_relative_url(video_url, 'http:'),
'title': title,
'thumbnail': thumbnail_url,
}
'url': video_url,
'uploader': uploader,
'thumbnail': self._proto_relative_url(
info.get('thumbnail') or self._og_search_thumbnail(webpage),
'http:'),
})
return info

View File

@@ -92,29 +92,30 @@ class VimeoBaseInfoExtractor(InfoExtractor):
def _vimeo_sort_formats(self, formats):
# Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps
# at the same time without actual units specified. This lead to wrong sorting.
self._sort_formats(formats, field_preference=('preference', 'height', 'width', 'fps', 'format_id'))
self._sort_formats(formats, field_preference=('preference', 'height', 'width', 'fps', 'tbr', 'format_id'))
def _parse_config(self, config, video_id):
video_data = config['video']
# Extract title
video_title = config['video']['title']
video_title = video_data['title']
# Extract uploader, uploader_url and uploader_id
video_uploader = config['video'].get('owner', {}).get('name')
video_uploader_url = config['video'].get('owner', {}).get('url')
video_uploader = video_data.get('owner', {}).get('name')
video_uploader_url = video_data.get('owner', {}).get('url')
video_uploader_id = video_uploader_url.split('/')[-1] if video_uploader_url else None
# Extract video thumbnail
video_thumbnail = config['video'].get('thumbnail')
video_thumbnail = video_data.get('thumbnail')
if video_thumbnail is None:
video_thumbs = config['video'].get('thumbs')
video_thumbs = video_data.get('thumbs')
if video_thumbs and isinstance(video_thumbs, dict):
_, video_thumbnail = sorted((int(width if width.isdigit() else 0), t_url) for (width, t_url) in video_thumbs.items())[-1]
# Extract video duration
video_duration = int_or_none(config['video'].get('duration'))
video_duration = int_or_none(video_data.get('duration'))
formats = []
config_files = config['video'].get('files') or config['request'].get('files', {})
config_files = video_data.get('files') or config['request'].get('files', {})
for f in config_files.get('progressive', []):
video_url = f.get('url')
if not video_url:
@@ -127,10 +128,24 @@ class VimeoBaseInfoExtractor(InfoExtractor):
'fps': int_or_none(f.get('fps')),
'tbr': int_or_none(f.get('bitrate')),
})
m3u8_url = config_files.get('hls', {}).get('url')
if m3u8_url:
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
for files_type in ('hls', 'dash'):
for cdn_name, cdn_data in config_files.get(files_type, {}).get('cdns', {}).items():
manifest_url = cdn_data.get('url')
if not manifest_url:
continue
format_id = '%s-%s' % (files_type, cdn_name)
if files_type == 'hls':
formats.extend(self._extract_m3u8_formats(
manifest_url, video_id, 'mp4',
'm3u8_native', m3u8_id=format_id,
note='Downloading %s m3u8 information' % cdn_name,
fatal=False))
elif files_type == 'dash':
formats.extend(self._extract_mpd_formats(
manifest_url.replace('/master.json', '/master.mpd'), video_id, format_id,
'Downloading %s MPD information' % cdn_name,
fatal=False))
subtitles = {}
text_tracks = config['request'].get('text_tracks')

249
youtube_dl/extractor/viu.py Normal file
View File

@@ -0,0 +1,249 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
int_or_none,
)
class ViuBaseIE(InfoExtractor):
def _real_initialize(self):
viu_auth_res = self._request_webpage(
'https://www.viu.com/api/apps/v2/authenticate', None,
'Requesting Viu auth', query={
'acct': 'test',
'appid': 'viu_desktop',
'fmt': 'json',
'iid': 'guest',
'languageid': 'default',
'platform': 'desktop',
'userid': 'guest',
'useridtype': 'guest',
'ver': '1.0'
}, headers=self.geo_verification_headers())
self._auth_token = viu_auth_res.info()['X-VIU-AUTH']
def _call_api(self, path, *args, **kwargs):
headers = self.geo_verification_headers()
headers.update({
'X-VIU-AUTH': self._auth_token
})
headers.update(kwargs.get('headers', {}))
kwargs['headers'] = headers
response = self._download_json(
'https://www.viu.com/api/' + path, *args, **kwargs)['response']
if response.get('status') != 'success':
raise ExtractorError('%s said: %s' % (
self.IE_NAME, response['message']), expected=True)
return response
class ViuIE(ViuBaseIE):
_VALID_URL = r'(?:viu:|https?://www\.viu\.com/[a-z]{2}/media/)(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.viu.com/en/media/1116705532?containerId=playlist-22168059',
'info_dict': {
'id': '1116705532',
'ext': 'mp4',
'title': 'Citizen Khan - Ep 1',
'description': 'md5:d7ea1604f49e5ba79c212c551ce2110e',
},
'params': {
'skip_download': 'm3u8 download',
},
'skip': 'Geo-restricted to India',
}, {
'url': 'https://www.viu.com/en/media/1130599965',
'info_dict': {
'id': '1130599965',
'ext': 'mp4',
'title': 'Jealousy Incarnate - Episode 1',
'description': 'md5:d3d82375cab969415d2720b6894361e9',
},
'params': {
'skip_download': 'm3u8 download',
},
'skip': 'Geo-restricted to Indonesia',
}]
def _real_extract(self, url):
video_id = self._match_id(url)
video_data = self._call_api(
'clip/load', video_id, 'Downloading video data', query={
'appid': 'viu_desktop',
'fmt': 'json',
'id': video_id
})['item'][0]
title = video_data['title']
m3u8_url = None
url_path = video_data.get('urlpathd') or video_data.get('urlpath')
tdirforwhole = video_data.get('tdirforwhole')
# #EXT-X-BYTERANGE is not supported by native hls downloader
# and ffmpeg (#10955)
# hls_file = video_data.get('hlsfile')
hls_file = video_data.get('jwhlsfile')
if url_path and tdirforwhole and hls_file:
m3u8_url = '%s/%s/%s' % (url_path, tdirforwhole, hls_file)
else:
# m3u8_url = re.sub(
# r'(/hlsc_)[a-z]+(\d+\.m3u8)',
# r'\1whe\2', video_data['href'])
m3u8_url = video_data['href']
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
self._sort_formats(formats)
subtitles = {}
for key, value in video_data.items():
mobj = re.match(r'^subtitle_(?P<lang>[^_]+)_(?P<ext>(vtt|srt))', key)
if not mobj:
continue
subtitles.setdefault(mobj.group('lang'), []).append({
'url': value,
'ext': mobj.group('ext')
})
return {
'id': video_id,
'title': title,
'description': video_data.get('description'),
'series': video_data.get('moviealbumshowname'),
'episode': title,
'episode_number': int_or_none(video_data.get('episodeno')),
'duration': int_or_none(video_data.get('duration')),
'formats': formats,
'subtitles': subtitles,
}
class ViuPlaylistIE(ViuBaseIE):
IE_NAME = 'viu:playlist'
_VALID_URL = r'https?://www\.viu\.com/[^/]+/listing/playlist-(?P<id>\d+)'
_TEST = {
'url': 'https://www.viu.com/en/listing/playlist-22461380',
'info_dict': {
'id': '22461380',
'title': 'The Good Wife',
},
'playlist_count': 16,
'skip': 'Geo-restricted to Indonesia',
}
def _real_extract(self, url):
playlist_id = self._match_id(url)
playlist_data = self._call_api(
'container/load', playlist_id,
'Downloading playlist info', query={
'appid': 'viu_desktop',
'fmt': 'json',
'id': 'playlist-' + playlist_id
})['container']
entries = []
for item in playlist_data.get('item', []):
item_id = item.get('id')
if not item_id:
continue
item_id = compat_str(item_id)
entries.append(self.url_result(
'viu:' + item_id, 'Viu', item_id))
return self.playlist_result(
entries, playlist_id, playlist_data.get('title'))
class ViuOTTIE(InfoExtractor):
IE_NAME = 'viu:ott'
_VALID_URL = r'https?://(?:www\.)?viu\.com/ott/(?P<country_code>[a-z]{2})/[a-z]{2}-[a-z]{2}/vod/(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.viu.com/ott/sg/en-us/vod/3421/The%20Prime%20Minister%20and%20I',
'info_dict': {
'id': '3421',
'ext': 'mp4',
'title': 'A New Beginning',
'description': 'md5:1e7486a619b6399b25ba6a41c0fe5b2c',
},
'params': {
'skip_download': 'm3u8 download',
},
'skip': 'Geo-restricted to Singapore',
}, {
'url': 'http://www.viu.com/ott/hk/zh-hk/vod/7123/%E5%A4%A7%E4%BA%BA%E5%A5%B3%E5%AD%90',
'info_dict': {
'id': '7123',
'ext': 'mp4',
'title': '這就是我的生活之道',
'description': 'md5:4eb0d8b08cf04fcdc6bbbeb16043434f',
},
'params': {
'skip_download': 'm3u8 download',
},
'skip': 'Geo-restricted to Hong Kong',
}]
def _real_extract(self, url):
country_code, video_id = re.match(self._VALID_URL, url).groups()
product_data = self._download_json(
'http://www.viu.com/ott/%s/index.php' % country_code, video_id,
'Downloading video info', query={
'r': 'vod/ajax-detail',
'platform_flag_label': 'web',
'product_id': video_id,
})['data']
video_data = product_data.get('current_product')
if not video_data:
raise ExtractorError('This video is not available in your region.', expected=True)
stream_data = self._download_json(
'https://d1k2us671qcoau.cloudfront.net/distribute_web_%s.php' % country_code,
video_id, 'Downloading stream info', query={
'ccs_product_id': video_data['ccs_product_id'],
})['data']['stream']
stream_sizes = stream_data.get('size', {})
formats = []
for vid_format, stream_url in stream_data.get('url', {}).items():
height = int_or_none(self._search_regex(
r's(\d+)p', vid_format, 'height', default=None))
formats.append({
'format_id': vid_format,
'url': stream_url,
'height': height,
'ext': 'mp4',
'filesize': int_or_none(stream_sizes.get(vid_format))
})
self._sort_formats(formats)
subtitles = {}
for sub in video_data.get('subtitle', []):
sub_url = sub.get('url')
if not sub_url:
continue
subtitles.setdefault(sub.get('name'), []).append({
'url': sub_url,
'ext': 'srt',
})
title = video_data['synopsis'].strip()
return {
'id': video_id,
'title': title,
'description': video_data.get('description'),
'series': product_data.get('series', {}).get('name'),
'episode': title,
'episode_number': int_or_none(video_data.get('number')),
'duration': int_or_none(stream_data.get('duration')),
'thumbnail': video_data.get('cover_image_url'),
'formats': formats,
'subtitles': subtitles,
}

View File

@@ -10,6 +10,7 @@ from ..utils import (
float_or_none,
int_or_none,
remove_start,
urlencode_postdata,
)
from ..compat import compat_urllib_parse_urlencode
@@ -48,17 +49,23 @@ class VLiveIE(InfoExtractor):
webpage = self._download_webpage(
'http://www.vlive.tv/video/%s' % video_id, video_id)
video_params = self._search_regex(
r'\bvlive\.video\.init\(([^)]+)\)',
webpage, 'video params')
status, _, _, live_params, long_video_id, key = re.split(
r'"\s*,\s*"', video_params)[2:8]
VIDEO_PARAMS_RE = r'\bvlive\.video\.init\(([^)]+)'
VIDEO_PARAMS_FIELD = 'video params'
params = self._parse_json(self._search_regex(
VIDEO_PARAMS_RE, webpage, VIDEO_PARAMS_FIELD, default=''), video_id,
transform_source=lambda s: '[' + s + ']', fatal=False)
if not params or len(params) < 7:
params = self._search_regex(
VIDEO_PARAMS_RE, webpage, VIDEO_PARAMS_FIELD)
params = [p.strip(r'"') for p in re.split(r'\s*,\s*', params)]
status, long_video_id, key = params[2], params[5], params[6]
status = remove_start(status, 'PRODUCT_')
if status == 'LIVE_ON_AIR' or status == 'BIG_EVENT_ON_AIR':
live_params = self._parse_json('"%s"' % live_params, video_id)
live_params = self._parse_json(live_params, video_id)
return self._live(video_id, webpage, live_params)
return self._live(video_id, webpage)
elif status == 'VOD_ON_AIR' or status == 'BIG_EVENT_INTRO':
if long_video_id and key:
return self._replay(video_id, webpage, long_video_id, key)
@@ -89,7 +96,22 @@ class VLiveIE(InfoExtractor):
'thumbnail': thumbnail,
}
def _live(self, video_id, webpage, live_params):
def _live(self, video_id, webpage):
init_page = self._download_webpage(
'http://www.vlive.tv/video/init/view',
video_id, note='Downloading live webpage',
data=urlencode_postdata({'videoSeq': video_id}),
headers={
'Referer': 'http://www.vlive.tv/video/%s' % video_id,
'Content-Type': 'application/x-www-form-urlencoded'
})
live_params = self._search_regex(
r'"liveStreamInfo"\s*:\s*(".*"),',
init_page, 'live stream info')
live_params = self._parse_json(live_params, video_id)
live_params = self._parse_json(live_params, video_id)
formats = []
for vid in live_params.get('resolutions', []):
formats.extend(self._extract_m3u8_formats(
@@ -98,10 +120,14 @@ class VLiveIE(InfoExtractor):
fatal=False, live=True))
self._sort_formats(formats)
return dict(self._get_common_fields(webpage),
id=video_id,
formats=formats,
is_live=True)
info = self._get_common_fields(webpage)
info.update({
'title': self._live_title(info['title']),
'id': video_id,
'formats': formats,
'is_live': True,
})
return info
def _replay(self, video_id, webpage, long_video_id, key):
playinfo = self._download_json(
@@ -135,8 +161,11 @@ class VLiveIE(InfoExtractor):
'ext': 'vtt',
'url': caption['source']}]
return dict(self._get_common_fields(webpage),
id=video_id,
formats=formats,
view_count=view_count,
subtitles=subtitles)
info = self._get_common_fields(webpage)
info.update({
'id': video_id,
'formats': formats,
'view_count': view_count,
'subtitles': subtitles,
})
return info

View File

@@ -7,6 +7,7 @@ from ..utils import (
ExtractorError,
parse_duration,
str_to_int,
urljoin,
)
@@ -66,10 +67,9 @@ class VpornIE(InfoExtractor):
description = self._html_search_regex(
r'class="(?:descr|description_txt)">(.*?)</div>',
webpage, 'description', fatal=False)
thumbnail = self._html_search_regex(
r'flashvars\.imageUrl\s*=\s*"([^"]+)"', webpage, 'description', fatal=False, default=None)
if thumbnail:
thumbnail = 'http://www.vporn.com' + thumbnail
thumbnail = urljoin('http://www.vporn.com', self._html_search_regex(
r'flashvars\.imageUrl\s*=\s*"([^"]+)"', webpage, 'description',
default=None))
uploader = self._html_search_regex(
r'(?s)Uploaded by:.*?<a href="/user/[^"]+"[^>]*>(.+?)</a>',

View File

@@ -0,0 +1,140 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
str_or_none,
)
class VVVVIDIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?vvvvid\.it/#!(?:show|anime|film|series)/(?P<show_id>\d+)/[^/]+/(?P<season_id>\d+)/(?P<id>[0-9]+)'
_TESTS = [{
# video_type == 'video/vvvvid'
'url': 'https://www.vvvvid.it/#!show/434/perche-dovrei-guardarlo-di-dario-moccia/437/489048/ping-pong',
'md5': 'b8d3cecc2e981adc3835adf07f6df91b',
'info_dict': {
'id': '489048',
'ext': 'mp4',
'title': 'Ping Pong',
},
}, {
# video_type == 'video/rcs'
'url': 'https://www.vvvvid.it/#!show/376/death-note-live-action/377/482493/episodio-01',
'md5': '33e0edfba720ad73a8782157fdebc648',
'info_dict': {
'id': '482493',
'ext': 'mp4',
'title': 'Episodio 01',
},
}]
_conn_id = None
def _real_initialize(self):
self._conn_id = self._download_json(
'https://www.vvvvid.it/user/login',
None, headers=self.geo_verification_headers())['data']['conn_id']
def _real_extract(self, url):
show_id, season_id, video_id = re.match(self._VALID_URL, url).groups()
response = self._download_json(
'https://www.vvvvid.it/vvvvid/ondemand/%s/season/%s' % (show_id, season_id),
video_id, headers=self.geo_verification_headers(), query={
'conn_id': self._conn_id,
})
if response['result'] == 'error':
raise ExtractorError('%s said: %s' % (
self.IE_NAME, response['message']), expected=True)
vid = int(video_id)
video_data = list(filter(
lambda episode: episode.get('video_id') == vid, response['data']))[0]
formats = []
# vvvvid embed_info decryption algorithm is reverse engineered from function $ds(h) at vvvvid.js
def ds(h):
g = "MNOPIJKL89+/4567UVWXQRSTEFGHABCDcdefYZabstuvopqr0123wxyzklmnghij"
def f(m):
l = []
o = 0
b = False
m_len = len(m)
while ((not b) and o < m_len):
n = m[o] << 2
o += 1
k = -1
j = -1
if o < m_len:
n += m[o] >> 4
o += 1
if o < m_len:
k = (m[o - 1] << 4) & 255
k += m[o] >> 2
o += 1
if o < m_len:
j = (m[o - 1] << 6) & 255
j += m[o]
o += 1
else:
b = True
else:
b = True
else:
b = True
l.append(n)
if k != -1:
l.append(k)
if j != -1:
l.append(j)
return l
c = []
for e in h:
c.append(g.index(e))
c_len = len(c)
for e in range(c_len * 2 - 1, -1, -1):
a = c[e % c_len] ^ c[(e + 1) % c_len]
c[e % c_len] = a
c = f(c)
d = ''
for e in c:
d += chr(e)
return d
for quality in ('_sd', ''):
embed_code = video_data.get('embed_info' + quality)
if not embed_code:
continue
embed_code = ds(embed_code)
video_type = video_data.get('video_type')
if video_type in ('video/rcs', 'video/kenc'):
formats.extend(self._extract_akamai_formats(
embed_code, video_id))
else:
formats.extend(self._extract_wowza_formats(
'http://sb.top-ix.org/videomg/_definst_/mp4:%s/playlist.m3u8' % embed_code, video_id))
self._sort_formats(formats)
return {
'id': video_id,
'title': video_data['title'],
'formats': formats,
'thumbnail': video_data.get('thumbnail'),
'duration': int_or_none(video_data.get('length')),
'series': video_data.get('show_title'),
'season_id': season_id,
'season_number': video_data.get('season_number'),
'episode_id': str_or_none(video_data.get('id')),
'epidode_number': int_or_none(video_data.get('number')),
'episode_title': video_data['title'],
'view_count': int_or_none(video_data.get('views')),
'like_count': int_or_none(video_data.get('video_likes')),
}

View File

@@ -86,6 +86,11 @@ std_headers = {
}
USER_AGENTS = {
'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
}
NO_DEFAULT = object()
ENGLISH_MONTH_NAMES = [
@@ -1695,6 +1700,16 @@ def base_url(url):
return re.match(r'https?://[^?#&]+/', url).group()
def urljoin(base, path):
if not isinstance(path, compat_str) or not path:
return None
if re.match(r'^(?:https?:)?//', path):
return path
if not isinstance(base, compat_str) or not re.match(r'^(?:https?:)?//', base):
return None
return compat_urlparse.urljoin(base, path)
class HEADRequest(compat_urllib_request.Request):
def get_method(self):
return 'HEAD'

View File

@@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2016.12.09'
__version__ = '2016.12.22'