mirror of
https://source.netsyms.com/Mirrors/youtube-dl
synced 2026-03-27 03:27:58 +00:00
Compare commits
44 Commits
2016.08.10
...
2016.08.13
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
73a85620ee | ||
|
|
a560f28c98 | ||
|
|
5ec5461e1a | ||
|
|
542130a5d9 | ||
|
|
82997dad57 | ||
|
|
647a7bf5e8 | ||
|
|
77afa008dd | ||
|
|
db535435b3 | ||
|
|
c2a453b461 | ||
|
|
cd29eaab95 | ||
|
|
52aa7e7476 | ||
|
|
e97c55ee6a | ||
|
|
acfccacad5 | ||
|
|
5f2c2b7936 | ||
|
|
cb55908e51 | ||
|
|
e581224843 | ||
|
|
f50365e91c | ||
|
|
c366f8d30a | ||
|
|
6a26c5f9d5 | ||
|
|
bd6fb007de | ||
|
|
b69b2ff736 | ||
|
|
794e5dcd7e | ||
|
|
f0d3669437 | ||
|
|
98e698f1ff | ||
|
|
3cddb8d6a7 | ||
|
|
990d533ee4 | ||
|
|
b0081562d2 | ||
|
|
fff37cfd4f | ||
|
|
a3be69b7f0 | ||
|
|
0fd1b1624c | ||
|
|
367976d49f | ||
|
|
0aef0771f8 | ||
|
|
0c070681c5 | ||
|
|
30b25d382d | ||
|
|
e5f878c205 | ||
|
|
e2e84aed7e | ||
|
|
b1927f4e8a | ||
|
|
3b9323d96e | ||
|
|
7f832413d6 | ||
|
|
7f2ed47595 | ||
|
|
c3fa77bdef | ||
|
|
57ce8a6d08 | ||
|
|
69d8eeeec5 | ||
|
|
81c13222c6 |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.10*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.10**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.13*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.13**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2016.08.10
|
||||
[debug] youtube-dl version 2016.08.13
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
||||
43
ChangeLog
43
ChangeLog
@@ -1,3 +1,46 @@
|
||||
version 2016.08.13
|
||||
|
||||
Core
|
||||
* Show progress for curl external downloader
|
||||
* Forward more options to curl external downloader
|
||||
|
||||
Extractors
|
||||
* [pbs] Fix description extraction
|
||||
* [franceculture] Fix extraction (#10324)
|
||||
* [pornotube] Fix extraction (#10322)
|
||||
* [4tube] Fix metadata extraction (#10321)
|
||||
* [imgur] Fix width and height extraction (#10325)
|
||||
* [expotv] Improve extraction
|
||||
+ [vbox7] Fix extraction (#10309)
|
||||
- [tapely] Remove extractor (#10323)
|
||||
* [muenchentv] Fix extraction (#10313)
|
||||
+ [24video] Add support for .me and .xxx TLDs
|
||||
* [24video] Fix comment count extraction
|
||||
* [sunporno] Add support for embed URLs
|
||||
* [sunporno] Fix metadata extraction (#10316)
|
||||
+ [hgtv] Add extractor for hgtv.ca (#3999)
|
||||
- [pbs] Remove request to unavailable API
|
||||
+ [pbs] Add support for high quality HTTP formats
|
||||
+ [crunchyroll] Add support for HLS formats (#10301)
|
||||
|
||||
version 2016.08.12
|
||||
|
||||
Core
|
||||
* Subtitles are now written as is. Newline conversions are disabled. (#10268)
|
||||
+ Recognize more formats in unified_timestamp
|
||||
|
||||
Extractors
|
||||
- [goldenmoustache] Remove extractor (#10298)
|
||||
* [drtuber] Improve title extraction
|
||||
* [drtuber] Make dislike count optional (#10297)
|
||||
* [chirbit] Fix extraction (#10296)
|
||||
* [francetvinfo] Relax URL regular expression
|
||||
* [rtlnl] Relax URL regular expression (#10282)
|
||||
* [formula1] Relax URL regular expression (#10283)
|
||||
* [wat] Improve extraction (#10281)
|
||||
* [ctsnews] Fix extraction
|
||||
|
||||
|
||||
version 2016.08.10
|
||||
|
||||
Core
|
||||
|
||||
@@ -238,7 +238,6 @@
|
||||
- **FoxSports**
|
||||
- **france2.fr:generation-quoi**
|
||||
- **FranceCulture**
|
||||
- **FranceCultureEmission**
|
||||
- **FranceInter**
|
||||
- **francetv**: France 2, 3, 4, 5 and Ô
|
||||
- **francetvinfo.fr**
|
||||
@@ -265,7 +264,6 @@
|
||||
- **GloboArticle**
|
||||
- **GodTube**
|
||||
- **GodTV**
|
||||
- **GoldenMoustache**
|
||||
- **Golem**
|
||||
- **GoogleDrive**
|
||||
- **Goshgay**
|
||||
@@ -278,6 +276,7 @@
|
||||
- **HellPorno**
|
||||
- **Helsinki**: helsinki.fi
|
||||
- **HentaiStigma**
|
||||
- **HGTV**
|
||||
- **HistoricFilms**
|
||||
- **history:topic**: History.com Topic
|
||||
- **hitbox**
|
||||
@@ -665,7 +664,6 @@
|
||||
- **SztvHu**
|
||||
- **Tagesschau**
|
||||
- **tagesschau:player**
|
||||
- **Tapely**
|
||||
- **Tass**
|
||||
- **TDSLifeway**
|
||||
- **teachertube**: teachertube.com videos
|
||||
|
||||
@@ -968,6 +968,7 @@ The first line
|
||||
self.assertEqual(cli_option({'proxy': '127.0.0.1:3128'}, '--proxy', 'proxy'), ['--proxy', '127.0.0.1:3128'])
|
||||
self.assertEqual(cli_option({'proxy': None}, '--proxy', 'proxy'), [])
|
||||
self.assertEqual(cli_option({}, '--proxy', 'proxy'), [])
|
||||
self.assertEqual(cli_option({'retries': 10}, '--retries', 'retries'), ['--retries', '10'])
|
||||
|
||||
def test_cli_valueless_option(self):
|
||||
self.assertEqual(cli_valueless_option(
|
||||
|
||||
@@ -22,10 +22,10 @@ class TestVerboseOutput(unittest.TestCase):
|
||||
'--password', 'secret',
|
||||
], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
sout, serr = outp.communicate()
|
||||
self.assertTrue('--username' in serr)
|
||||
self.assertTrue('johnsmith' not in serr)
|
||||
self.assertTrue('--password' in serr)
|
||||
self.assertTrue('secret' not in serr)
|
||||
self.assertTrue(b'--username' in serr)
|
||||
self.assertTrue(b'johnsmith' not in serr)
|
||||
self.assertTrue(b'--password' in serr)
|
||||
self.assertTrue(b'secret' not in serr)
|
||||
|
||||
def test_private_info_shortarg(self):
|
||||
outp = subprocess.Popen(
|
||||
@@ -35,10 +35,10 @@ class TestVerboseOutput(unittest.TestCase):
|
||||
'-p', 'secret',
|
||||
], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
sout, serr = outp.communicate()
|
||||
self.assertTrue('-u' in serr)
|
||||
self.assertTrue('johnsmith' not in serr)
|
||||
self.assertTrue('-p' in serr)
|
||||
self.assertTrue('secret' not in serr)
|
||||
self.assertTrue(b'-u' in serr)
|
||||
self.assertTrue(b'johnsmith' not in serr)
|
||||
self.assertTrue(b'-p' in serr)
|
||||
self.assertTrue(b'secret' not in serr)
|
||||
|
||||
def test_private_info_eq(self):
|
||||
outp = subprocess.Popen(
|
||||
@@ -48,10 +48,10 @@ class TestVerboseOutput(unittest.TestCase):
|
||||
'--password=secret',
|
||||
], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
sout, serr = outp.communicate()
|
||||
self.assertTrue('--username' in serr)
|
||||
self.assertTrue('johnsmith' not in serr)
|
||||
self.assertTrue('--password' in serr)
|
||||
self.assertTrue('secret' not in serr)
|
||||
self.assertTrue(b'--username' in serr)
|
||||
self.assertTrue(b'johnsmith' not in serr)
|
||||
self.assertTrue(b'--password' in serr)
|
||||
self.assertTrue(b'secret' not in serr)
|
||||
|
||||
def test_private_info_shortarg_eq(self):
|
||||
outp = subprocess.Popen(
|
||||
@@ -61,10 +61,10 @@ class TestVerboseOutput(unittest.TestCase):
|
||||
'-p=secret',
|
||||
], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
sout, serr = outp.communicate()
|
||||
self.assertTrue('-u' in serr)
|
||||
self.assertTrue('johnsmith' not in serr)
|
||||
self.assertTrue('-p' in serr)
|
||||
self.assertTrue('secret' not in serr)
|
||||
self.assertTrue(b'-u' in serr)
|
||||
self.assertTrue(b'johnsmith' not in serr)
|
||||
self.assertTrue(b'-p' in serr)
|
||||
self.assertTrue(b'secret' not in serr)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
@@ -1603,7 +1603,9 @@ class YoutubeDL(object):
|
||||
self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
|
||||
else:
|
||||
self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
|
||||
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
|
||||
# Use newline='' to prevent conversion of newline characters
|
||||
# See https://github.com/rg3/youtube-dl/issues/10268
|
||||
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
|
||||
subfile.write(sub_data)
|
||||
except (OSError, IOError):
|
||||
self.report_error('Cannot write subtitles file ' + sub_filename)
|
||||
|
||||
@@ -96,6 +96,12 @@ class CurlFD(ExternalFD):
|
||||
cmd = [self.exe, '--location', '-o', tmpfilename]
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['--header', '%s: %s' % (key, val)]
|
||||
cmd += self._bool_option('--continue-at', 'continuedl', '-', '0')
|
||||
cmd += self._valueless_option('--silent', 'noprogress')
|
||||
cmd += self._valueless_option('--verbose', 'verbose')
|
||||
cmd += self._option('--limit-rate', 'ratelimit')
|
||||
cmd += self._option('--retry', 'retries')
|
||||
cmd += self._option('--max-filesize', 'max_filesize')
|
||||
cmd += self._option('--interface', 'source_address')
|
||||
cmd += self._option('--proxy', 'proxy')
|
||||
cmd += self._valueless_option('--insecure', 'nocheckcertificate')
|
||||
@@ -103,6 +109,16 @@ class CurlFD(ExternalFD):
|
||||
cmd += ['--', info_dict['url']]
|
||||
return cmd
|
||||
|
||||
def _call_downloader(self, tmpfilename, info_dict):
|
||||
cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
|
||||
|
||||
self._debug_cmd(cmd)
|
||||
|
||||
# curl writes the progress to stderr so don't capture it.
|
||||
p = subprocess.Popen(cmd)
|
||||
p.communicate()
|
||||
return p.returncode
|
||||
|
||||
|
||||
class AxelFD(ExternalFD):
|
||||
AVAILABLE_OPT = '-V'
|
||||
|
||||
@@ -11,15 +11,6 @@ from ..compat import compat_urllib_parse_unquote
|
||||
class BigflixIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bigflix\.com/.+/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.bigflix.com/Hindi-movies/Action-movies/Singham-Returns/16537',
|
||||
'md5': 'dc1b4aebb46e3a7077ecc0d9f43f61e3',
|
||||
'info_dict': {
|
||||
'id': '16537',
|
||||
'ext': 'mp4',
|
||||
'title': 'Singham Returns',
|
||||
'description': 'md5:3d2ba5815f14911d5cc6a501ae0cf65d',
|
||||
}
|
||||
}, {
|
||||
# 2 formats
|
||||
'url': 'http://www.bigflix.com/Tamil-movies/Drama-movies/Madarasapatinam/16070',
|
||||
'info_dict': {
|
||||
|
||||
@@ -1,30 +1,33 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
int_or_none,
|
||||
)
|
||||
from ..utils import parse_duration
|
||||
|
||||
|
||||
class ChirbitIE(InfoExtractor):
|
||||
IE_NAME = 'chirbit'
|
||||
_VALID_URL = r'https?://(?:www\.)?chirb\.it/(?:(?:wp|pl)/|fb_chirbit_player\.swf\?key=)?(?P<id>[\da-zA-Z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://chirb.it/PrIPv5',
|
||||
'md5': '9847b0dad6ac3e074568bf2cfb197de8',
|
||||
'url': 'http://chirb.it/be2abG',
|
||||
'info_dict': {
|
||||
'id': 'PrIPv5',
|
||||
'id': 'be2abG',
|
||||
'ext': 'mp3',
|
||||
'title': 'Фасадстрой',
|
||||
'duration': 52,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'title': 'md5:f542ea253f5255240be4da375c6a5d7e',
|
||||
'description': 'md5:f24a4e22a71763e32da5fed59e47c770',
|
||||
'duration': 306,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://chirb.it/fb_chirbit_player.swf?key=PrIPv5',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://chirb.it/wp/MN58c2',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -33,27 +36,30 @@ class ChirbitIE(InfoExtractor):
|
||||
webpage = self._download_webpage(
|
||||
'http://chirb.it/%s' % audio_id, audio_id)
|
||||
|
||||
audio_url = self._search_regex(
|
||||
r'"setFile"\s*,\s*"([^"]+)"', webpage, 'audio url')
|
||||
data_fd = self._search_regex(
|
||||
r'data-fd=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
webpage, 'data fd', group='url')
|
||||
|
||||
# Reverse engineered from https://chirb.it/js/chirbit.player.js (look
|
||||
# for soundURL)
|
||||
audio_url = base64.b64decode(
|
||||
data_fd[::-1].encode('ascii')).decode('utf-8')
|
||||
|
||||
title = self._search_regex(
|
||||
r'itemprop="name">([^<]+)', webpage, 'title')
|
||||
duration = parse_duration(self._html_search_meta(
|
||||
'duration', webpage, 'duration', fatal=False))
|
||||
view_count = int_or_none(self._search_regex(
|
||||
r'itemprop="playCount"\s*>(\d+)', webpage,
|
||||
'listen count', fatal=False))
|
||||
comment_count = int_or_none(self._search_regex(
|
||||
r'>(\d+) Comments?:', webpage,
|
||||
'comment count', fatal=False))
|
||||
r'class=["\']chirbit-title["\'][^>]*>([^<]+)', webpage, 'title')
|
||||
description = self._search_regex(
|
||||
r'<h3>Description</h3>\s*<pre[^>]*>([^<]+)</pre>',
|
||||
webpage, 'description', default=None)
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'class=["\']c-length["\'][^>]*>([^<]+)',
|
||||
webpage, 'duration', fatal=False))
|
||||
|
||||
return {
|
||||
'id': audio_id,
|
||||
'url': audio_url,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'comment_count': comment_count,
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -114,6 +114,21 @@ class CrunchyrollIE(CrunchyrollBaseIE):
|
||||
# rtmp
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.crunchyroll.com/rezero-starting-life-in-another-world-/episode-5-the-morning-of-our-promise-is-still-distant-702409',
|
||||
'info_dict': {
|
||||
'id': '702409',
|
||||
'ext': 'mp4',
|
||||
'title': 'Re:ZERO -Starting Life in Another World- Episode 5 – The Morning of Our Promise Is Still Distant',
|
||||
'description': 'md5:97664de1ab24bbf77a9c01918cb7dca9',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'TV TOKYO',
|
||||
'upload_date': '20160508',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.crunchyroll.fr/girl-friend-beta/episode-11-goodbye-la-mode-661697',
|
||||
'only_matching': True,
|
||||
@@ -336,9 +351,18 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
if video_encode_id in video_encode_ids:
|
||||
continue
|
||||
video_encode_ids.append(video_encode_id)
|
||||
|
||||
video_file = xpath_text(stream_info, './file')
|
||||
if not video_file:
|
||||
continue
|
||||
if video_file.startswith('http'):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_file, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
continue
|
||||
|
||||
video_url = xpath_text(stream_info, './host')
|
||||
video_play_path = xpath_text(stream_info, './file')
|
||||
if not video_url or not video_play_path:
|
||||
if not video_url:
|
||||
continue
|
||||
metadata = stream_info.find('./metadata')
|
||||
format_info = {
|
||||
@@ -353,7 +377,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
parsed_video_url = compat_urlparse.urlparse(video_url)
|
||||
direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace(
|
||||
netloc='v.lvlt.crcdn.net',
|
||||
path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_play_path.split(':')[-1])))
|
||||
path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1])))
|
||||
if self._is_valid_url(direct_video_url, video_id, video_format):
|
||||
format_info.update({
|
||||
'url': direct_video_url,
|
||||
@@ -363,7 +387,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
|
||||
format_info.update({
|
||||
'url': video_url,
|
||||
'play_path': video_play_path,
|
||||
'play_path': video_file,
|
||||
'ext': 'flv',
|
||||
})
|
||||
formats.append(format_info)
|
||||
|
||||
@@ -1,13 +1,12 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_iso8601, ExtractorError
|
||||
from ..utils import unified_timestamp
|
||||
|
||||
|
||||
class CtsNewsIE(InfoExtractor):
|
||||
IE_DESC = '華視新聞'
|
||||
# https connection failed (Connection reset)
|
||||
_VALID_URL = r'https?://news\.cts\.com\.tw/[a-z]+/[a-z]+/\d+/(?P<id>\d+)\.html'
|
||||
_TESTS = [{
|
||||
'url': 'http://news.cts.com.tw/cts/international/201501/201501291578109.html',
|
||||
@@ -16,7 +15,7 @@ class CtsNewsIE(InfoExtractor):
|
||||
'id': '201501291578109',
|
||||
'ext': 'mp4',
|
||||
'title': '以色列.真主黨交火 3人死亡',
|
||||
'description': 'md5:95e9b295c898b7ff294f09d450178d7d',
|
||||
'description': '以色列和黎巴嫩真主黨,爆發五年最嚴重衝突,雙方砲轟交火,兩名以軍死亡,還有一名西班牙籍的聯合國維和人...',
|
||||
'timestamp': 1422528540,
|
||||
'upload_date': '20150129',
|
||||
}
|
||||
@@ -28,7 +27,7 @@ class CtsNewsIE(InfoExtractor):
|
||||
'id': '201309031304098',
|
||||
'ext': 'mp4',
|
||||
'title': '韓國31歲童顏男 貌如十多歲小孩',
|
||||
'description': 'md5:f183feeba3752b683827aab71adad584',
|
||||
'description': '越有年紀的人,越希望看起來年輕一點,而南韓卻有一位31歲的男子,看起來像是11、12歲的小孩,身...',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'timestamp': 1378205880,
|
||||
'upload_date': '20130903',
|
||||
@@ -36,8 +35,7 @@ class CtsNewsIE(InfoExtractor):
|
||||
}, {
|
||||
# With Youtube embedded video
|
||||
'url': 'http://news.cts.com.tw/cts/money/201501/201501291578003.html',
|
||||
'md5': '1d842c771dc94c8c3bca5af2cc1db9c5',
|
||||
'add_ie': ['Youtube'],
|
||||
'md5': 'e4726b2ccd70ba2c319865e28f0a91d1',
|
||||
'info_dict': {
|
||||
'id': 'OVbfO7d0_hQ',
|
||||
'ext': 'mp4',
|
||||
@@ -47,42 +45,37 @@ class CtsNewsIE(InfoExtractor):
|
||||
'upload_date': '20150128',
|
||||
'uploader_id': 'TBSCTS',
|
||||
'uploader': '中華電視公司',
|
||||
}
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
news_id = self._match_id(url)
|
||||
page = self._download_webpage(url, news_id)
|
||||
|
||||
if self._search_regex(r'(CTSPlayer2)', page, 'CTSPlayer2 identifier', default=None):
|
||||
feed_url = self._html_search_regex(
|
||||
r'(http://news\.cts\.com\.tw/action/mp4feed\.php\?news_id=\d+)',
|
||||
page, 'feed url')
|
||||
video_url = self._download_webpage(
|
||||
feed_url, news_id, note='Fetching feed')
|
||||
news_id = self._hidden_inputs(page).get('get_id')
|
||||
|
||||
if news_id:
|
||||
mp4_feed = self._download_json(
|
||||
'http://news.cts.com.tw/action/test_mp4feed.php',
|
||||
news_id, note='Fetching feed', query={'news_id': news_id})
|
||||
video_url = mp4_feed['source_url']
|
||||
else:
|
||||
self.to_screen('Not CTSPlayer video, trying Youtube...')
|
||||
youtube_url = self._search_regex(
|
||||
r'src="(//www\.youtube\.com/embed/[^"]+)"', page, 'youtube url',
|
||||
default=None)
|
||||
if not youtube_url:
|
||||
raise ExtractorError('The news includes no videos!', expected=True)
|
||||
r'src="(//www\.youtube\.com/embed/[^"]+)"', page, 'youtube url')
|
||||
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': youtube_url,
|
||||
'ie_key': 'Youtube',
|
||||
}
|
||||
return self.url_result(youtube_url, ie='Youtube')
|
||||
|
||||
description = self._html_search_meta('description', page)
|
||||
title = self._html_search_meta('title', page)
|
||||
title = self._html_search_meta('title', page, fatal=True)
|
||||
thumbnail = self._html_search_meta('image', page)
|
||||
|
||||
datetime_str = self._html_search_regex(
|
||||
r'(\d{4}/\d{2}/\d{2} \d{2}:\d{2})', page, 'date and time')
|
||||
# Transform into ISO 8601 format with timezone info
|
||||
datetime_str = datetime_str.replace('/', '-') + ':00+0800'
|
||||
timestamp = parse_iso8601(datetime_str, delimiter=' ')
|
||||
r'(\d{4}/\d{2}/\d{2} \d{2}:\d{2})', page, 'date and time', fatal=False)
|
||||
timestamp = None
|
||||
if datetime_str:
|
||||
timestamp = unified_timestamp(datetime_str) - 8 * 3600
|
||||
|
||||
return {
|
||||
'id': news_id,
|
||||
|
||||
@@ -3,7 +3,10 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import str_to_int
|
||||
from ..utils import (
|
||||
NO_DEFAULT,
|
||||
str_to_int,
|
||||
)
|
||||
|
||||
|
||||
class DrTuberIE(InfoExtractor):
|
||||
@@ -17,7 +20,6 @@ class DrTuberIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'hot perky blonde naked golf',
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
'categories': ['Babe', 'Blonde', 'Erotic', 'Outdoor', 'Softcore', 'Solo'],
|
||||
'thumbnail': 're:https?://.*\.jpg$',
|
||||
@@ -36,25 +38,29 @@ class DrTuberIE(InfoExtractor):
|
||||
r'<source src="([^"]+)"', webpage, 'video URL')
|
||||
|
||||
title = self._html_search_regex(
|
||||
[r'<p[^>]+class="title_substrate">([^<]+)</p>', r'<title>([^<]+) - \d+'],
|
||||
(r'class="title_watch"[^>]*><p>([^<]+)<',
|
||||
r'<p[^>]+class="title_substrate">([^<]+)</p>',
|
||||
r'<title>([^<]+) - \d+'),
|
||||
webpage, 'title')
|
||||
|
||||
thumbnail = self._html_search_regex(
|
||||
r'poster="([^"]+)"',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
|
||||
def extract_count(id_, name):
|
||||
def extract_count(id_, name, default=NO_DEFAULT):
|
||||
return str_to_int(self._html_search_regex(
|
||||
r'<span[^>]+(?:class|id)="%s"[^>]*>([\d,\.]+)</span>' % id_,
|
||||
webpage, '%s count' % name, fatal=False))
|
||||
webpage, '%s count' % name, default=default, fatal=False))
|
||||
|
||||
like_count = extract_count('rate_likes', 'like')
|
||||
dislike_count = extract_count('rate_dislikes', 'dislike')
|
||||
dislike_count = extract_count('rate_dislikes', 'dislike', default=None)
|
||||
comment_count = extract_count('comments_count', 'comment')
|
||||
|
||||
cats_str = self._search_regex(
|
||||
r'<div[^>]+class="categories_list">(.+?)</div>', webpage, 'categories', fatal=False)
|
||||
categories = [] if not cats_str else re.findall(r'<a title="([^"]+)"', cats_str)
|
||||
r'<div[^>]+class="categories_list">(.+?)</div>',
|
||||
webpage, 'categories', fatal=False)
|
||||
categories = [] if not cats_str else re.findall(
|
||||
r'<a title="([^"]+)"', cats_str)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
@@ -12,23 +10,22 @@ from ..utils import (
|
||||
class ExpoTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.expotv\.com/videos/[^?#]*/(?P<id>[0-9]+)($|[?#])'
|
||||
_TEST = {
|
||||
'url': 'http://www.expotv.com/videos/reviews/1/24/LinneCardscom/17561',
|
||||
'md5': '2985e6d7a392b2f7a05e0ca350fe41d0',
|
||||
'url': 'http://www.expotv.com/videos/reviews/3/40/NYX-Butter-lipstick/667916',
|
||||
'md5': 'fe1d728c3a813ff78f595bc8b7a707a8',
|
||||
'info_dict': {
|
||||
'id': '17561',
|
||||
'id': '667916',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20060212',
|
||||
'title': 'My Favorite Online Scrapbook Store',
|
||||
'view_count': int,
|
||||
'description': 'You\'ll find most everything you need at this virtual store front.',
|
||||
'uploader': 'Anna T.',
|
||||
'title': 'NYX Butter Lipstick Little Susie',
|
||||
'description': 'Goes on like butter, but looks better!',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'Stephanie S.',
|
||||
'upload_date': '20150520',
|
||||
'view_count': int,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
player_key = self._search_regex(
|
||||
@@ -66,7 +63,7 @@ class ExpoTVIE(InfoExtractor):
|
||||
fatal=False)
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'<h5>Reviewed on ([0-9/.]+)</h5>', webpage, 'upload date',
|
||||
fatal=False))
|
||||
fatal=False), day_first=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
||||
@@ -272,10 +272,7 @@ from .fox import FOXIE
|
||||
from .foxgay import FoxgayIE
|
||||
from .foxnews import FoxNewsIE
|
||||
from .foxsports import FoxSportsIE
|
||||
from .franceculture import (
|
||||
FranceCultureIE,
|
||||
FranceCultureEmissionIE,
|
||||
)
|
||||
from .franceculture import FranceCultureIE
|
||||
from .franceinter import FranceInterIE
|
||||
from .francetv import (
|
||||
PluzzIE,
|
||||
@@ -311,7 +308,6 @@ from .globo import (
|
||||
)
|
||||
from .godtube import GodTubeIE
|
||||
from .godtv import GodTVIE
|
||||
from .goldenmoustache import GoldenMoustacheIE
|
||||
from .golem import GolemIE
|
||||
from .googledrive import GoogleDriveIE
|
||||
from .googleplus import GooglePlusIE
|
||||
@@ -326,6 +322,7 @@ from .heise import HeiseIE
|
||||
from .hellporno import HellPornoIE
|
||||
from .helsinki import HelsinkiIE
|
||||
from .hentaistigma import HentaiStigmaIE
|
||||
from .hgtv import HGTVIE
|
||||
from .historicfilms import HistoricFilmsIE
|
||||
from .hitbox import HitboxIE, HitboxLiveIE
|
||||
from .hornbunny import HornBunnyIE
|
||||
@@ -812,7 +809,6 @@ from .tagesschau import (
|
||||
TagesschauPlayerIE,
|
||||
TagesschauIE,
|
||||
)
|
||||
from .tapely import TapelyIE
|
||||
from .tass import TassIE
|
||||
from .tdslifeway import TDSLifewayIE
|
||||
from .teachertube import (
|
||||
|
||||
@@ -5,8 +5,8 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class Formula1IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?formula1\.com/content/fom-website/en/video/\d{4}/\d{1,2}/(?P<id>.+?)\.html'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?formula1\.com/(?:content/fom-website/)?en/video/\d{4}/\d{1,2}/(?P<id>.+?)\.html'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.formula1.com/content/fom-website/en/video/2016/5/Race_highlights_-_Spain_2016.html',
|
||||
'md5': '8c79e54be72078b26b89e0e111c0502b',
|
||||
'info_dict': {
|
||||
@@ -15,7 +15,10 @@ class Formula1IE(InfoExtractor):
|
||||
'title': 'Race highlights - Spain 2016',
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.formula1.com/en/video/2016/5/Race_highlights_-_Spain_2016.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
@@ -43,14 +43,14 @@ class FourTubeIE(InfoExtractor):
|
||||
'uploadDate', webpage))
|
||||
thumbnail = self._html_search_meta('thumbnailUrl', webpage)
|
||||
uploader_id = self._html_search_regex(
|
||||
r'<a class="img-avatar" href="[^"]+/channels/([^/"]+)" title="Go to [^"]+ page">',
|
||||
r'<a class="item-to-subscribe" href="[^"]+/channels/([^/"]+)" title="Go to [^"]+ page">',
|
||||
webpage, 'uploader id', fatal=False)
|
||||
uploader = self._html_search_regex(
|
||||
r'<a class="img-avatar" href="[^"]+/channels/[^/"]+" title="Go to ([^"]+) page">',
|
||||
r'<a class="item-to-subscribe" href="[^"]+/channels/[^/"]+" title="Go to ([^"]+) page">',
|
||||
webpage, 'uploader', fatal=False)
|
||||
|
||||
categories_html = self._search_regex(
|
||||
r'(?s)><i class="icon icon-tag"></i>\s*Categories / Tags\s*.*?<ul class="list">(.*?)</ul>',
|
||||
r'(?s)><i class="icon icon-tag"></i>\s*Categories / Tags\s*.*?<ul class="[^"]*?list[^"]*?">(.*?)</ul>',
|
||||
webpage, 'categories', fatal=False)
|
||||
categories = None
|
||||
if categories_html:
|
||||
@@ -59,10 +59,10 @@ class FourTubeIE(InfoExtractor):
|
||||
r'(?s)<li><a.*?>(.*?)</a>', categories_html)]
|
||||
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'<meta itemprop="interactionCount" content="UserPlays:([0-9,]+)">',
|
||||
r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserPlays:([0-9,]+)">',
|
||||
webpage, 'view count', fatal=False))
|
||||
like_count = str_to_int(self._search_regex(
|
||||
r'<meta itemprop="interactionCount" content="UserLikes:([0-9,]+)">',
|
||||
r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserLikes:([0-9,]+)">',
|
||||
webpage, 'like count', fatal=False))
|
||||
duration = parse_duration(self._html_search_meta('duration', webpage))
|
||||
|
||||
|
||||
@@ -2,104 +2,56 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
ExtractorError,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class FranceCultureIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?franceculture\.fr/player/reecouter\?play=(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?franceculture\.fr/emissions/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.franceculture.fr/player/reecouter?play=4795174',
|
||||
'url': 'http://www.franceculture.fr/emissions/carnet-nomade/rendez-vous-au-pays-des-geeks',
|
||||
'info_dict': {
|
||||
'id': '4795174',
|
||||
'id': 'rendez-vous-au-pays-des-geeks',
|
||||
'display_id': 'rendez-vous-au-pays-des-geeks',
|
||||
'ext': 'mp3',
|
||||
'title': 'Rendez-vous au pays des geeks',
|
||||
'alt_title': 'Carnet nomade | 13-14',
|
||||
'vcodec': 'none',
|
||||
'thumbnail': 're:^https?://.*\\.jpg$',
|
||||
'upload_date': '20140301',
|
||||
'thumbnail': r're:^http://static\.franceculture\.fr/.*/images/player/Carnet-nomade\.jpg$',
|
||||
'description': 'startswith:Avec :Jean-Baptiste Péretié pour son documentaire sur Arte "La revanche',
|
||||
'timestamp': 1393700400,
|
||||
'vcodec': 'none',
|
||||
}
|
||||
}
|
||||
|
||||
def _extract_from_player(self, url, video_id):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
video_path = self._search_regex(
|
||||
r'<a id="player".*?href="([^"]+)"', webpage, 'video path')
|
||||
video_url = compat_urlparse.urljoin(url, video_path)
|
||||
timestamp = int_or_none(self._search_regex(
|
||||
r'<a id="player".*?data-date="([0-9]+)"',
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_url = self._search_regex(
|
||||
r'(?s)<div[^>]+class="[^"]*?title-zone-diffusion[^"]*?"[^>]*>.*?<a[^>]+href="([^"]+)"',
|
||||
webpage, 'video path')
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
'(?s)<div[^>]+class="date"[^>]*>.*?<span[^>]+class="inner"[^>]*>([^<]+)<',
|
||||
webpage, 'upload date', fatal=False))
|
||||
thumbnail = self._search_regex(
|
||||
r'<a id="player".*?>\s+<img src="([^"]+)"',
|
||||
r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+data-pagespeed-(?:lazy|high-res)-src="([^"]+)"',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
|
||||
display_id = self._search_regex(
|
||||
r'<span class="path-diffusion">emission-(.*?)</span>', webpage, 'display_id')
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<span class="title-diffusion">(.*?)</span>', webpage, 'title')
|
||||
alt_title = self._html_search_regex(
|
||||
r'<span class="title">(.*?)</span>',
|
||||
webpage, 'alt_title', fatal=False)
|
||||
description = self._html_search_regex(
|
||||
r'<span class="description">(.*?)</span>',
|
||||
webpage, 'description', fatal=False)
|
||||
|
||||
uploader = self._html_search_regex(
|
||||
r'(?s)<div id="emission".*?<span class="author">(.*?)</span>',
|
||||
webpage, 'uploader', default=None)
|
||||
vcodec = 'none' if determine_ext(video_url.lower()) == 'mp3' else None
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'id': display_id,
|
||||
'display_id': display_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'vcodec': vcodec,
|
||||
'uploader': uploader,
|
||||
'timestamp': timestamp,
|
||||
'title': title,
|
||||
'alt_title': alt_title,
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
'display_id': display_id,
|
||||
'upload_date': upload_date,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self._extract_from_player(url, video_id)
|
||||
|
||||
|
||||
class FranceCultureEmissionIE(FranceCultureIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?franceculture\.fr/emission-(?P<id>[^?#]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.franceculture.fr/emission-les-carnets-de-la-creation-jean-gabriel-periot-cineaste-2015-10-13',
|
||||
'info_dict': {
|
||||
'title': 'Jean-Gabriel Périot, cinéaste',
|
||||
'alt_title': 'Les Carnets de la création',
|
||||
'id': '5093239',
|
||||
'display_id': 'les-carnets-de-la-creation-jean-gabriel-periot-cineaste-2015-10-13',
|
||||
'ext': 'mp3',
|
||||
'timestamp': 1444762500,
|
||||
'upload_date': '20151013',
|
||||
'description': 'startswith:Aujourd\'hui dans "Les carnets de la création", le cinéaste',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_path = self._html_search_regex(
|
||||
r'<a class="rf-player-open".*?href="([^"]+)"', webpage, 'video path', 'no_path_player')
|
||||
if video_path == 'no_path_player':
|
||||
raise ExtractorError('no player : no sound in this page.', expected=True)
|
||||
new_id = self._search_regex('play=(?P<id>[0-9]+)', video_path, 'new_id', group='id')
|
||||
video_url = compat_urlparse.urljoin(url, video_path)
|
||||
return self._extract_from_player(video_url, new_id)
|
||||
|
||||
@@ -131,7 +131,7 @@ class PluzzIE(FranceTVBaseInfoExtractor):
|
||||
|
||||
class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
||||
IE_NAME = 'francetvinfo.fr'
|
||||
_VALID_URL = r'https?://(?:www|mobile|france3-regions)\.francetvinfo\.fr/.*/(?P<title>.+)\.html'
|
||||
_VALID_URL = r'https?://(?:www|mobile|france3-regions)\.francetvinfo\.fr/(?:[^/]+/)*(?P<title>[^/?#&.]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
|
||||
@@ -206,6 +206,9 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
||||
'uploader_id': 'x2q2ez',
|
||||
},
|
||||
'add_ie': ['Dailymotion'],
|
||||
}, {
|
||||
'url': 'http://france3-regions.francetvinfo.fr/limousin/emissions/jt-1213-limousin',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -1,48 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class GoldenMoustacheIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?goldenmoustache\.com/(?P<display_id>[\w-]+)-(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.goldenmoustache.com/suricate-le-poker-3700/',
|
||||
'md5': '0f904432fa07da5054d6c8beb5efb51a',
|
||||
'info_dict': {
|
||||
'id': '3700',
|
||||
'ext': 'mp4',
|
||||
'title': 'Suricate - Le Poker',
|
||||
'description': 'md5:3d1f242f44f8c8cb0a106f1fd08e5dc9',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.goldenmoustache.com/le-lab-tout-effacer-mc-fly-et-carlito-55249/',
|
||||
'md5': '27f0c50fb4dd5f01dc9082fc67cd5700',
|
||||
'info_dict': {
|
||||
'id': '55249',
|
||||
'ext': 'mp4',
|
||||
'title': 'Le LAB - Tout Effacer (Mc Fly et Carlito)',
|
||||
'description': 'md5:9b7fbf11023fb2250bd4b185e3de3b2a',
|
||||
'thumbnail': 're:^https?://.*\.(?:png|jpg)$',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = self._html_search_regex(
|
||||
r'data-src-type="mp4" data-src="([^"]+)"', webpage, 'video URL')
|
||||
title = self._html_search_regex(
|
||||
r'<title>(.*?)(?: - Golden Moustache)?</title>', webpage, 'title')
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
48
youtube_dl/extractor/hgtv.py
Normal file
48
youtube_dl/extractor/hgtv.py
Normal file
@@ -0,0 +1,48 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
smuggle_url,
|
||||
)
|
||||
|
||||
|
||||
class HGTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?hgtv\.ca/[^/]+/video/(?P<id>[^/]+)/video.html'
|
||||
_TEST = {
|
||||
'url': 'http://www.hgtv.ca/homefree/video/overnight-success/video.html?v=738081859718&p=1&s=da#video',
|
||||
'md5': '',
|
||||
'info_dict': {
|
||||
'id': 'aFH__I_5FBOX',
|
||||
'ext': 'mp4',
|
||||
'title': 'Overnight Success',
|
||||
'description': 'After weeks of hard work, high stakes, breakdowns and pep talks, the final 2 contestants compete to win the ultimate dream.',
|
||||
'uploader': 'SHWM-NEW',
|
||||
'timestamp': 1470320034,
|
||||
'upload_date': '20160804',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
embed_vars = self._parse_json(self._search_regex(
|
||||
r'(?s)embed_vars\s*=\s*({.*?});',
|
||||
webpage, 'embed vars'), display_id, js_to_json)
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': smuggle_url(
|
||||
'http://link.theplatform.com/s/dtjsEC/%s?mbr=true&manifest=m3u' % embed_vars['pid'], {
|
||||
'force_smil_url': True
|
||||
}),
|
||||
'series': embed_vars.get('show'),
|
||||
'season_number': int_or_none(embed_vars.get('season')),
|
||||
'episode_number': int_or_none(embed_vars.get('episode')),
|
||||
'ie_key': 'ThePlatform',
|
||||
}
|
||||
@@ -50,12 +50,10 @@ class ImgurIE(InfoExtractor):
|
||||
webpage = self._download_webpage(
|
||||
compat_urlparse.urljoin(url, video_id), video_id)
|
||||
|
||||
width = int_or_none(self._search_regex(
|
||||
r'<param name="width" value="([0-9]+)"',
|
||||
webpage, 'width', fatal=False))
|
||||
height = int_or_none(self._search_regex(
|
||||
r'<param name="height" value="([0-9]+)"',
|
||||
webpage, 'height', fatal=False))
|
||||
width = int_or_none(self._og_search_property(
|
||||
'video:width', webpage, default=None))
|
||||
height = int_or_none(self._og_search_property(
|
||||
'video:height', webpage, default=None))
|
||||
|
||||
video_elements = self._search_regex(
|
||||
r'(?s)<div class="video-elements">(.*?)</div>',
|
||||
|
||||
@@ -36,7 +36,7 @@ class MuenchenTVIE(InfoExtractor):
|
||||
title = self._live_title(self._og_search_title(webpage))
|
||||
|
||||
data_js = self._search_regex(
|
||||
r'(?s)\nplaylist:\s*(\[.*?}\]),related:',
|
||||
r'(?s)\nplaylist:\s*(\[.*?}\]),',
|
||||
webpage, 'playlist configuration')
|
||||
data_json = js_to_json(data_js)
|
||||
data = json.loads(data_json)[0]
|
||||
|
||||
@@ -4,13 +4,13 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
strip_jsonp,
|
||||
strip_or_none,
|
||||
unified_strdate,
|
||||
US_RATINGS,
|
||||
)
|
||||
@@ -201,7 +201,7 @@ class PBSIE(InfoExtractor):
|
||||
'id': '2365006249',
|
||||
'ext': 'mp4',
|
||||
'title': 'Constitution USA with Peter Sagal - A More Perfect Union',
|
||||
'description': 'md5:36f341ae62e251b8f5bd2b754b95a071',
|
||||
'description': 'md5:31b664af3c65fd07fa460d306b837d00',
|
||||
'duration': 3190,
|
||||
},
|
||||
},
|
||||
@@ -212,7 +212,7 @@ class PBSIE(InfoExtractor):
|
||||
'id': '2365297690',
|
||||
'ext': 'mp4',
|
||||
'title': 'FRONTLINE - Losing Iraq',
|
||||
'description': 'md5:4d3eaa01f94e61b3e73704735f1196d9',
|
||||
'description': 'md5:5979a4d069b157f622d02bff62fbe654',
|
||||
'duration': 5050,
|
||||
},
|
||||
},
|
||||
@@ -223,7 +223,7 @@ class PBSIE(InfoExtractor):
|
||||
'id': '2201174722',
|
||||
'ext': 'mp4',
|
||||
'title': 'PBS NewsHour - Cyber Schools Gain Popularity, but Quality Questions Persist',
|
||||
'description': 'md5:95a19f568689d09a166dff9edada3301',
|
||||
'description': 'md5:86ab9a3d04458b876147b355788b8781',
|
||||
'duration': 801,
|
||||
},
|
||||
},
|
||||
@@ -268,7 +268,7 @@ class PBSIE(InfoExtractor):
|
||||
'display_id': 'player',
|
||||
'ext': 'mp4',
|
||||
'title': 'American Experience - Death and the Civil War, Chapter 1',
|
||||
'description': 'md5:1b80a74e0380ed2a4fb335026de1600d',
|
||||
'description': 'md5:67fa89a9402e2ee7d08f53b920674c18',
|
||||
'duration': 682,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
@@ -294,13 +294,13 @@ class PBSIE(InfoExtractor):
|
||||
# "<iframe style='position: absolute;<br />\ntop: 0; left: 0;' ...", see
|
||||
# https://github.com/rg3/youtube-dl/issues/7059)
|
||||
'url': 'http://www.pbs.org/food/features/a-chefs-life-season-3-episode-5-prickly-business/',
|
||||
'md5': '84ced42850d78f1d4650297356e95e6f',
|
||||
'md5': '59b0ef5009f9ac8a319cc5efebcd865e',
|
||||
'info_dict': {
|
||||
'id': '2365546844',
|
||||
'display_id': 'a-chefs-life-season-3-episode-5-prickly-business',
|
||||
'ext': 'mp4',
|
||||
'title': "A Chef's Life - Season 3, Ep. 5: Prickly Business",
|
||||
'description': 'md5:54033c6baa1f9623607c6e2ed245888b',
|
||||
'description': 'md5:c0ff7475a4b70261c7e58f493c2792a5',
|
||||
'duration': 1480,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
@@ -313,7 +313,7 @@ class PBSIE(InfoExtractor):
|
||||
'display_id': 'the-atomic-artists',
|
||||
'ext': 'mp4',
|
||||
'title': 'FRONTLINE - The Atomic Artists',
|
||||
'description': 'md5:1a2481e86b32b2e12ec1905dd473e2c1',
|
||||
'description': 'md5:f677e4520cfacb4a5ce1471e31b57800',
|
||||
'duration': 723,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
@@ -324,7 +324,7 @@ class PBSIE(InfoExtractor):
|
||||
{
|
||||
# Serves hd only via wigget/partnerplayer page
|
||||
'url': 'http://www.pbs.org/video/2365641075/',
|
||||
'md5': 'acfd4c400b48149a44861cb16dd305cf',
|
||||
'md5': 'fdf907851eab57211dd589cf12006666',
|
||||
'info_dict': {
|
||||
'id': '2365641075',
|
||||
'ext': 'mp4',
|
||||
@@ -353,11 +353,16 @@ class PBSIE(InfoExtractor):
|
||||
def _extract_webpage(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
description = None
|
||||
|
||||
presumptive_id = mobj.group('presumptive_id')
|
||||
display_id = presumptive_id
|
||||
if presumptive_id:
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
description = strip_or_none(self._og_search_description(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'description', webpage, default=None))
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'<input type="hidden" id="air_date_[0-9]+" value="([^"]+)"',
|
||||
webpage, 'upload date', default=None))
|
||||
@@ -370,7 +375,7 @@ class PBSIE(InfoExtractor):
|
||||
for p in MULTI_PART_REGEXES:
|
||||
tabbed_videos = re.findall(p, webpage)
|
||||
if tabbed_videos:
|
||||
return tabbed_videos, presumptive_id, upload_date
|
||||
return tabbed_videos, presumptive_id, upload_date, description
|
||||
|
||||
MEDIA_ID_REGEXES = [
|
||||
r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'", # frontline video embed
|
||||
@@ -382,7 +387,7 @@ class PBSIE(InfoExtractor):
|
||||
media_id = self._search_regex(
|
||||
MEDIA_ID_REGEXES, webpage, 'media ID', fatal=False, default=None)
|
||||
if media_id:
|
||||
return media_id, presumptive_id, upload_date
|
||||
return media_id, presumptive_id, upload_date, description
|
||||
|
||||
# Fronline video embedded via flp
|
||||
video_id = self._search_regex(
|
||||
@@ -399,7 +404,7 @@ class PBSIE(InfoExtractor):
|
||||
'http://www.pbs.org/wgbh/pages/frontline/.json/getdir/getdir%d.json' % prg_id,
|
||||
presumptive_id, 'Downloading getdir JSON',
|
||||
transform_source=strip_jsonp)
|
||||
return getdir['mid'], presumptive_id, upload_date
|
||||
return getdir['mid'], presumptive_id, upload_date, description
|
||||
|
||||
for iframe in re.findall(r'(?s)<iframe(.+?)></iframe>', webpage):
|
||||
url = self._search_regex(
|
||||
@@ -423,10 +428,10 @@ class PBSIE(InfoExtractor):
|
||||
video_id = mobj.group('id')
|
||||
display_id = video_id
|
||||
|
||||
return video_id, display_id, None
|
||||
return video_id, display_id, None, description
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, display_id, upload_date = self._extract_webpage(url)
|
||||
video_id, display_id, upload_date, description = self._extract_webpage(url)
|
||||
|
||||
if isinstance(video_id, list):
|
||||
entries = [self.url_result(
|
||||
@@ -448,17 +453,6 @@ class PBSIE(InfoExtractor):
|
||||
redirects.append(redirect)
|
||||
redirect_urls.add(redirect_url)
|
||||
|
||||
try:
|
||||
video_info = self._download_json(
|
||||
'http://player.pbs.org/videoInfo/%s?format=json&type=partner' % video_id,
|
||||
display_id, 'Downloading video info JSON')
|
||||
extract_redirect_urls(video_info)
|
||||
info = video_info
|
||||
except ExtractorError as e:
|
||||
# videoInfo API may not work for some videos
|
||||
if not isinstance(e.cause, compat_HTTPError) or e.cause.code != 404:
|
||||
raise
|
||||
|
||||
# Player pages may also serve different qualities
|
||||
for page in ('widget/partnerplayer', 'portalplayer'):
|
||||
player = self._download_webpage(
|
||||
@@ -511,15 +505,19 @@ class PBSIE(InfoExtractor):
|
||||
formats))
|
||||
if http_url:
|
||||
for m3u8_format in m3u8_formats:
|
||||
bitrate = self._search_regex(r'(\d+k)', m3u8_format['url'], 'bitrate', default=None)
|
||||
# extract only the formats that we know that they will be available as http format.
|
||||
# https://projects.pbs.org/confluence/display/coveapi/COVE+Video+Specifications
|
||||
if not bitrate or bitrate not in ('400k', '800k', '1200k', '2500k'):
|
||||
bitrate = self._search_regex(r'(\d+)k', m3u8_format['url'], 'bitrate', default=None)
|
||||
# Lower qualities (150k and 192k) are not available as HTTP formats (see [1]),
|
||||
# we won't try extracting them.
|
||||
# Since summer 2016 higher quality formats (4500k and 6500k) are also available
|
||||
# albeit they are not documented in [2].
|
||||
# 1. https://github.com/rg3/youtube-dl/commit/cbc032c8b70a038a69259378c92b4ba97b42d491#commitcomment-17313656
|
||||
# 2. https://projects.pbs.org/confluence/display/coveapi/COVE+Video+Specifications
|
||||
if not bitrate or int(bitrate) < 400:
|
||||
continue
|
||||
f_url = re.sub(r'\d+k|baseline', bitrate, http_url)
|
||||
f_url = re.sub(r'\d+k|baseline', bitrate + 'k', http_url)
|
||||
# This may produce invalid links sometimes (e.g.
|
||||
# http://www.pbs.org/wgbh/frontline/film/suicide-plan)
|
||||
if not self._is_valid_url(f_url, display_id, 'http-%s video' % bitrate):
|
||||
if not self._is_valid_url(f_url, display_id, 'http-%sk video' % bitrate):
|
||||
continue
|
||||
f = m3u8_format.copy()
|
||||
f.update({
|
||||
@@ -562,11 +560,14 @@ class PBSIE(InfoExtractor):
|
||||
if alt_title:
|
||||
info['title'] = alt_title + ' - ' + re.sub(r'^' + alt_title + '[\s\-:]+', '', info['title'])
|
||||
|
||||
description = info.get('description') or info.get(
|
||||
'program', {}).get('description') or description
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': info['title'],
|
||||
'description': info.get('description') or info.get('program', {}).get('description'),
|
||||
'description': description,
|
||||
'thumbnail': info.get('image_url'),
|
||||
'duration': int_or_none(info.get('duration')),
|
||||
'age_limit': age_limit,
|
||||
|
||||
@@ -3,10 +3,7 @@ from __future__ import unicode_literals
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
sanitized_Request,
|
||||
)
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class PornotubeIE(InfoExtractor):
|
||||
@@ -31,59 +28,55 @@ class PornotubeIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
# Fetch origin token
|
||||
js_config = self._download_webpage(
|
||||
'http://www.pornotube.com/assets/src/app/config.js', video_id,
|
||||
note='Download JS config')
|
||||
originAuthenticationSpaceKey = self._search_regex(
|
||||
r"constant\('originAuthenticationSpaceKey',\s*'([^']+)'",
|
||||
js_config, 'originAuthenticationSpaceKey')
|
||||
token = self._download_json(
|
||||
'https://api.aebn.net/auth/v2/origins/authenticate',
|
||||
video_id, note='Downloading token',
|
||||
data=json.dumps({'credentials': 'Clip Application'}).encode('utf-8'),
|
||||
headers={
|
||||
'Content-Type': 'application/json',
|
||||
'Origin': 'http://www.pornotube.com',
|
||||
})['tokenKey']
|
||||
|
||||
# Fetch actual token
|
||||
token_req_data = {
|
||||
'authenticationSpaceKey': originAuthenticationSpaceKey,
|
||||
'credentials': 'Clip Application',
|
||||
}
|
||||
token_req = sanitized_Request(
|
||||
'https://api.aebn.net/auth/v1/token/primal',
|
||||
data=json.dumps(token_req_data).encode('utf-8'))
|
||||
token_req.add_header('Content-Type', 'application/json')
|
||||
token_req.add_header('Origin', 'http://www.pornotube.com')
|
||||
token_answer = self._download_json(
|
||||
token_req, video_id, note='Requesting primal token')
|
||||
token = token_answer['tokenKey']
|
||||
video_url = self._download_json(
|
||||
'https://api.aebn.net/delivery/v1/clips/%s/MP4' % video_id,
|
||||
video_id, note='Downloading delivery information',
|
||||
headers={'Authorization': token})['mediaUrl']
|
||||
|
||||
# Get video URL
|
||||
delivery_req = sanitized_Request(
|
||||
'https://api.aebn.net/delivery/v1/clips/%s/MP4' % video_id)
|
||||
delivery_req.add_header('Authorization', token)
|
||||
delivery_info = self._download_json(
|
||||
delivery_req, video_id, note='Downloading delivery information')
|
||||
video_url = delivery_info['mediaUrl']
|
||||
FIELDS = (
|
||||
'title', 'description', 'startSecond', 'endSecond', 'publishDate',
|
||||
'studios{name}', 'categories{name}', 'movieId', 'primaryImageNumber'
|
||||
)
|
||||
|
||||
# Get additional info (title etc.)
|
||||
info_req = sanitized_Request(
|
||||
'https://api.aebn.net/content/v1/clips/%s?expand='
|
||||
'title,description,primaryImageNumber,startSecond,endSecond,'
|
||||
'movie.title,movie.MovieId,movie.boxCoverFront,movie.stars,'
|
||||
'movie.studios,stars.name,studios.name,categories.name,'
|
||||
'clipActive,movieActive,publishDate,orientations' % video_id)
|
||||
info_req.add_header('Authorization', token)
|
||||
info = self._download_json(
|
||||
info_req, video_id, note='Downloading metadata')
|
||||
'https://api.aebn.net/content/v2/clips/%s?fields=%s'
|
||||
% (video_id, ','.join(FIELDS)), video_id,
|
||||
note='Downloading metadata',
|
||||
headers={'Authorization': token})
|
||||
|
||||
if isinstance(info, list):
|
||||
info = info[0]
|
||||
|
||||
title = info['title']
|
||||
|
||||
timestamp = int_or_none(info.get('publishDate'), scale=1000)
|
||||
uploader = info.get('studios', [{}])[0].get('name')
|
||||
movie_id = info['movie']['movieId']
|
||||
thumbnail = 'http://pic.aebn.net/dis/t/%s/%s_%08d.jpg' % (
|
||||
movie_id, movie_id, info['primaryImageNumber'])
|
||||
categories = [c['name'] for c in info.get('categories')]
|
||||
movie_id = info.get('movieId')
|
||||
primary_image_number = info.get('primaryImageNumber')
|
||||
thumbnail = None
|
||||
if movie_id and primary_image_number:
|
||||
thumbnail = 'http://pic.aebn.net/dis/t/%s/%s_%08d.jpg' % (
|
||||
movie_id, movie_id, primary_image_number)
|
||||
start = int_or_none(info.get('startSecond'))
|
||||
end = int_or_none(info.get('endSecond'))
|
||||
duration = end - start if start and end else None
|
||||
categories = [c['name'] for c in info.get('categories', []) if c.get('name')]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': info['title'],
|
||||
'title': title,
|
||||
'description': info.get('description'),
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'uploader': uploader,
|
||||
'thumbnail': thumbnail,
|
||||
|
||||
@@ -14,7 +14,7 @@ class RtlNlIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?
|
||||
(?:
|
||||
rtlxl\.nl/\#!/[^/]+/|
|
||||
rtlxl\.nl/[^\#]*\#!/[^/]+/|
|
||||
rtl\.nl/system/videoplayer/(?:[^/]+/)+(?:video_)?embed\.html\b.+?\buuid=
|
||||
)
|
||||
(?P<id>[0-9a-f-]+)'''
|
||||
@@ -67,6 +67,9 @@ class RtlNlIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.rtl.nl/system/videoplayer/derden/embed.html#!/uuid=bb0353b0-d6a4-1dad-90e9-18fe75b8d1f0',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://rtlxl.nl/?_ga=1.204735956.572365465.1466978370#!/rtl-nieuws-132237/3c487912-023b-49ac-903e-2c5d79f8410f',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -12,25 +12,29 @@ from ..utils import (
|
||||
|
||||
|
||||
class SunPornoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?sunporno\.com/videos/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:(?:www\.)?sunporno\.com/videos|embeds\.sunporno\.com/embed)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.sunporno.com/videos/807778/',
|
||||
'md5': '6457d3c165fd6de062b99ef6c2ff4c86',
|
||||
'md5': '507887e29033502f29dba69affeebfc9',
|
||||
'info_dict': {
|
||||
'id': '807778',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:0a400058e8105d39e35c35e7c5184164',
|
||||
'description': 'md5:a31241990e1bd3a64e72ae99afb325fb',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 302,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://embeds.sunporno.com/embed/807778',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
webpage = self._download_webpage(
|
||||
'http://www.sunporno.com/videos/%s' % video_id, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<title>([^<]+)</title>', webpage, 'title')
|
||||
@@ -40,7 +44,8 @@ class SunPornoIE(InfoExtractor):
|
||||
r'poster="([^"]+)"', webpage, 'thumbnail', fatal=False)
|
||||
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'itemprop="duration">\s*(\d+:\d+)\s*<',
|
||||
(r'itemprop="duration"[^>]*>\s*(\d+:\d+)\s*<',
|
||||
r'>Duration:\s*<span[^>]+>\s*(\d+:\d+)\s*<'),
|
||||
webpage, 'duration', fatal=False))
|
||||
|
||||
view_count = int_or_none(self._html_search_regex(
|
||||
@@ -48,7 +53,7 @@ class SunPornoIE(InfoExtractor):
|
||||
webpage, 'view count', fatal=False))
|
||||
comment_count = int_or_none(self._html_search_regex(
|
||||
r'(\d+)</b> Comments?',
|
||||
webpage, 'comment count', fatal=False))
|
||||
webpage, 'comment count', fatal=False, default=None))
|
||||
|
||||
formats = []
|
||||
quality = qualities(['mp4', 'flv'])
|
||||
|
||||
@@ -1,109 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
class TapelyIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:tape\.ly|tapely\.com)/(?P<id>[A-Za-z0-9\-_]+)(?:/(?P<songnr>\d+))?'
|
||||
_API_URL = 'http://tape.ly/showtape?id={0:}'
|
||||
_S3_SONG_URL = 'http://mytape.s3.amazonaws.com/{0:}'
|
||||
_SOUNDCLOUD_SONG_URL = 'http://api.soundcloud.com{0:}'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://tape.ly/my-grief-as-told-by-water',
|
||||
'info_dict': {
|
||||
'id': 23952,
|
||||
'title': 'my grief as told by water',
|
||||
'thumbnail': 're:^https?://.*\.png$',
|
||||
'uploader_id': 16484,
|
||||
'timestamp': 1411848286,
|
||||
'description': 'For Robin and Ponkers, whom the tides of life have taken out to sea.',
|
||||
},
|
||||
'playlist_count': 13,
|
||||
},
|
||||
{
|
||||
'url': 'http://tape.ly/my-grief-as-told-by-water/1',
|
||||
'md5': '79031f459fdec6530663b854cbc5715c',
|
||||
'info_dict': {
|
||||
'id': 258464,
|
||||
'title': 'Dreaming Awake (My Brightest Diamond)',
|
||||
'ext': 'm4a',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://tapely.com/my-grief-as-told-by-water',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('id')
|
||||
|
||||
playlist_url = self._API_URL.format(display_id)
|
||||
request = sanitized_Request(playlist_url)
|
||||
request.add_header('X-Requested-With', 'XMLHttpRequest')
|
||||
request.add_header('Accept', 'application/json')
|
||||
request.add_header('Referer', url)
|
||||
|
||||
playlist = self._download_json(request, display_id)
|
||||
|
||||
tape = playlist['tape']
|
||||
|
||||
entries = []
|
||||
for s in tape['songs']:
|
||||
song = s['song']
|
||||
entry = {
|
||||
'id': song['id'],
|
||||
'duration': float_or_none(song.get('songduration'), 1000),
|
||||
'title': song['title'],
|
||||
}
|
||||
if song['source'] == 'S3':
|
||||
entry.update({
|
||||
'url': self._S3_SONG_URL.format(song['filename']),
|
||||
})
|
||||
entries.append(entry)
|
||||
elif song['source'] == 'YT':
|
||||
self.to_screen('YouTube video detected')
|
||||
yt_id = song['filename'].replace('/youtube/', '')
|
||||
entry.update(self.url_result(yt_id, 'Youtube', video_id=yt_id))
|
||||
entries.append(entry)
|
||||
elif song['source'] == 'SC':
|
||||
self.to_screen('SoundCloud song detected')
|
||||
sc_url = self._SOUNDCLOUD_SONG_URL.format(song['filename'])
|
||||
entry.update(self.url_result(sc_url, 'Soundcloud'))
|
||||
entries.append(entry)
|
||||
else:
|
||||
self.report_warning('Unknown song source: %s' % song['source'])
|
||||
|
||||
if mobj.group('songnr'):
|
||||
songnr = int(mobj.group('songnr')) - 1
|
||||
try:
|
||||
return entries[songnr]
|
||||
except IndexError:
|
||||
raise ExtractorError(
|
||||
'No song with index: %s' % mobj.group('songnr'),
|
||||
expected=True)
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': tape['id'],
|
||||
'display_id': display_id,
|
||||
'title': tape['name'],
|
||||
'entries': entries,
|
||||
'thumbnail': tape.get('image_url'),
|
||||
'description': clean_html(tape.get('subtext')),
|
||||
'like_count': tape.get('likescount'),
|
||||
'uploader_id': tape.get('user_id'),
|
||||
'timestamp': parse_iso8601(tape.get('published_at')),
|
||||
}
|
||||
@@ -12,32 +12,32 @@ from ..utils import (
|
||||
|
||||
class TwentyFourVideoIE(InfoExtractor):
|
||||
IE_NAME = '24video'
|
||||
_VALID_URL = r'https?://(?:www\.)?24video\.net/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?24video\.(?:net|me|xxx)/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.24video.net/video/view/1044982',
|
||||
'md5': 'e09fc0901d9eaeedac872f154931deeb',
|
||||
'info_dict': {
|
||||
'id': '1044982',
|
||||
'ext': 'mp4',
|
||||
'title': 'Эротика каменного века',
|
||||
'description': 'Как смотрели порно в каменном веке.',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'SUPERTELO',
|
||||
'duration': 31,
|
||||
'timestamp': 1275937857,
|
||||
'upload_date': '20100607',
|
||||
'age_limit': 18,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
},
|
||||
_TESTS = [{
|
||||
'url': 'http://www.24video.net/video/view/1044982',
|
||||
'md5': 'e09fc0901d9eaeedac872f154931deeb',
|
||||
'info_dict': {
|
||||
'id': '1044982',
|
||||
'ext': 'mp4',
|
||||
'title': 'Эротика каменного века',
|
||||
'description': 'Как смотрели порно в каменном веке.',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'SUPERTELO',
|
||||
'duration': 31,
|
||||
'timestamp': 1275937857,
|
||||
'upload_date': '20100607',
|
||||
'age_limit': 18,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
},
|
||||
{
|
||||
'url': 'http://www.24video.net/player/new24_play.swf?id=1044982',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
}, {
|
||||
'url': 'http://www.24video.net/player/new24_play.swf?id=1044982',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.24video.me/video/view/1044982',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@@ -64,7 +64,7 @@ class TwentyFourVideoIE(InfoExtractor):
|
||||
r'<span class="video-views">(\d+) просмотр',
|
||||
webpage, 'view count', fatal=False))
|
||||
comment_count = int_or_none(self._html_search_regex(
|
||||
r'<div class="comments-title" id="comments-count">(\d+) комментари',
|
||||
r'<a[^>]+href="#tab-comments"[^>]*>(\d+) комментари',
|
||||
webpage, 'comment count', fatal=False))
|
||||
|
||||
# Sets some cookies
|
||||
|
||||
@@ -2,17 +2,20 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
sanitized_Request,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from ..utils import urlencode_postdata
|
||||
|
||||
|
||||
class Vbox7IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?vbox7\.com/play:(?P<id>[^/]+)'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://vbox7.com/play:0946fff23c',
|
||||
'md5': 'a60f9ab3a3a2f013ef9a967d5f7be5bf',
|
||||
'info_dict': {
|
||||
'id': '0946fff23c',
|
||||
'ext': 'mp4',
|
||||
'title': 'Борисов: Притеснен съм за бъдещето на България',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://vbox7.com/play:249bb972c2',
|
||||
'md5': '99f65c0c9ef9b682b97313e052734c3f',
|
||||
'info_dict': {
|
||||
@@ -20,43 +23,38 @@ class Vbox7IE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Смях! Чудо - чист за секунди - Скрита камера',
|
||||
},
|
||||
}
|
||||
'skip': 'georestricted',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
# need to get the page 3 times for the correct jsSecretToken cookie
|
||||
# which is necessary for the correct title
|
||||
def get_session_id():
|
||||
redirect_page = self._download_webpage(url, video_id)
|
||||
session_id_url = self._search_regex(
|
||||
r'var\s*url\s*=\s*\'([^\']+)\';', redirect_page,
|
||||
'session id url')
|
||||
self._download_webpage(
|
||||
compat_urlparse.urljoin(url, session_id_url), video_id,
|
||||
'Getting session id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
get_session_id()
|
||||
get_session_id()
|
||||
title = self._html_search_regex(
|
||||
r'<title>(.*)</title>', webpage, 'title').split('/')[0].strip()
|
||||
|
||||
webpage = self._download_webpage(url, video_id,
|
||||
'Downloading redirect page')
|
||||
video_url = self._search_regex(
|
||||
r'src\s*:\s*(["\'])(?P<url>.+?.mp4.*?)\1',
|
||||
webpage, 'video url', default=None, group='url')
|
||||
|
||||
title = self._html_search_regex(r'<title>(.*)</title>',
|
||||
webpage, 'title').split('/')[0].strip()
|
||||
thumbnail_url = self._og_search_thumbnail(webpage)
|
||||
|
||||
info_url = 'http://vbox7.com/play/magare.do'
|
||||
data = urlencode_postdata({'as3': '1', 'vid': video_id})
|
||||
info_request = sanitized_Request(info_url, data)
|
||||
info_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
info_response = self._download_webpage(info_request, video_id, 'Downloading info webpage')
|
||||
if info_response is None:
|
||||
raise ExtractorError('Unable to extract the media url')
|
||||
(final_url, thumbnail_url) = map(lambda x: x.split('=')[1], info_response.split('&'))
|
||||
if not video_url:
|
||||
info_response = self._download_webpage(
|
||||
'http://vbox7.com/play/magare.do', video_id,
|
||||
'Downloading info webpage',
|
||||
data=urlencode_postdata({'as3': '1', 'vid': video_id}),
|
||||
headers={'Content-Type': 'application/x-www-form-urlencoded'})
|
||||
final_url, thumbnail_url = map(
|
||||
lambda x: x.split('=')[1], info_response.split('&'))
|
||||
|
||||
if '/na.mp4' in video_url:
|
||||
self.raise_geo_restricted()
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': final_url,
|
||||
'url': self._proto_relative_url(video_url, 'http:'),
|
||||
'title': title,
|
||||
'thumbnail': thumbnail_url,
|
||||
}
|
||||
|
||||
@@ -9,6 +9,7 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
unified_strdate,
|
||||
HEADRequest,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -30,48 +31,58 @@ class WatIE(InfoExtractor):
|
||||
},
|
||||
{
|
||||
'url': 'http://www.wat.tv/video/gregory-lemarchal-voix-ange-6z1v7_6ygkj_.html',
|
||||
'md5': 'fbc84e4378165278e743956d9c1bf16b',
|
||||
'md5': '34bdfa5ca9fd3c7eb88601b635b0424c',
|
||||
'info_dict': {
|
||||
'id': '11713075',
|
||||
'ext': 'mp4',
|
||||
'title': 'Grégory Lemarchal, une voix d\'ange depuis 10 ans (1/3)',
|
||||
'description': 'md5:b7a849cf16a2b733d9cd10c52906dee3',
|
||||
'upload_date': '20140816',
|
||||
'duration': 2910,
|
||||
},
|
||||
'skip': "Ce contenu n'est pas disponible pour l'instant.",
|
||||
'expected_warnings': ["Ce contenu n'est pas disponible pour l'instant."],
|
||||
},
|
||||
]
|
||||
|
||||
_FORMATS = (
|
||||
(200, 416, 234),
|
||||
(400, 480, 270),
|
||||
(600, 640, 360),
|
||||
(1200, 640, 360),
|
||||
(1800, 960, 540),
|
||||
(2500, 1280, 720),
|
||||
)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_id = video_id if video_id.isdigit() and len(video_id) > 6 else compat_str(int(video_id, 36))
|
||||
|
||||
# 'contentv4' is used in the website, but it also returns the related
|
||||
# videos, we don't need them
|
||||
video_info = self._download_json(
|
||||
'http://www.wat.tv/interface/contentv3/' + video_id, video_id)['media']
|
||||
video_data = self._download_json(
|
||||
'http://www.wat.tv/interface/contentv4s/' + video_id, video_id)
|
||||
video_info = video_data['media']
|
||||
|
||||
error_desc = video_info.get('error_desc')
|
||||
if error_desc:
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, error_desc), expected=True)
|
||||
self.report_warning(
|
||||
'%s returned error: %s' % (self.IE_NAME, error_desc))
|
||||
|
||||
chapters = video_info['chapters']
|
||||
first_chapter = chapters[0]
|
||||
if chapters:
|
||||
first_chapter = chapters[0]
|
||||
|
||||
def video_id_for_chapter(chapter):
|
||||
return chapter['tc_start'].split('-')[0]
|
||||
def video_id_for_chapter(chapter):
|
||||
return chapter['tc_start'].split('-')[0]
|
||||
|
||||
if video_id_for_chapter(first_chapter) != video_id:
|
||||
self.to_screen('Multipart video detected')
|
||||
entries = [self.url_result('wat:%s' % video_id_for_chapter(chapter)) for chapter in chapters]
|
||||
return self.playlist_result(entries, video_id, video_info['title'])
|
||||
# Otherwise we can continue and extract just one part, we have to use
|
||||
# the video id for getting the video url
|
||||
if video_id_for_chapter(first_chapter) != video_id:
|
||||
self.to_screen('Multipart video detected')
|
||||
entries = [self.url_result('wat:%s' % video_id_for_chapter(chapter)) for chapter in chapters]
|
||||
return self.playlist_result(entries, video_id, video_info['title'])
|
||||
# Otherwise we can continue and extract just one part, we have to use
|
||||
# the video id for getting the video url
|
||||
else:
|
||||
first_chapter = video_info
|
||||
|
||||
date_diffusion = first_chapter.get('date_diffusion')
|
||||
upload_date = unified_strdate(date_diffusion) if date_diffusion else None
|
||||
title = first_chapter['title']
|
||||
|
||||
def extract_url(path_template, url_type):
|
||||
req_url = 'http://www.wat.tv/get/%s' % (path_template % video_id)
|
||||
@@ -83,36 +94,61 @@ class WatIE(InfoExtractor):
|
||||
expected=True)
|
||||
return red_url
|
||||
|
||||
m3u8_url = extract_url('ipad/%s.m3u8', 'm3u8')
|
||||
http_url = extract_url('android5/%s.mp4', 'http')
|
||||
|
||||
formats = []
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
|
||||
formats.extend(m3u8_formats)
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
m3u8_url.replace('ios.', 'web.').replace('.m3u8', '.f4m'),
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
for m3u8_format in m3u8_formats:
|
||||
vbr, abr = m3u8_format.get('vbr'), m3u8_format.get('abr')
|
||||
if not vbr or not abr:
|
||||
continue
|
||||
f = m3u8_format.copy()
|
||||
f.update({
|
||||
'url': re.sub(r'%s-\d+00-\d+' % video_id, '%s-%d00-%d' % (video_id, round(vbr / 100), round(abr)), http_url),
|
||||
'format_id': f['format_id'].replace('hls', 'http'),
|
||||
'protocol': 'http',
|
||||
})
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
try:
|
||||
http_url = extract_url('android5/%s.mp4', 'http')
|
||||
m3u8_url = extract_url('ipad/%s.m3u8', 'm3u8')
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
|
||||
formats.extend(m3u8_formats)
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
m3u8_url.replace('ios.', 'web.').replace('.m3u8', '.f4m'),
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
for m3u8_format in m3u8_formats:
|
||||
vbr, abr = m3u8_format.get('vbr'), m3u8_format.get('abr')
|
||||
if not vbr or not abr:
|
||||
continue
|
||||
format_id = m3u8_format['format_id'].replace('hls', 'http')
|
||||
fmt_url = re.sub(r'%s-\d+00-\d+' % video_id, '%s-%d00-%d' % (video_id, round(vbr / 100), round(abr)), http_url)
|
||||
if self._is_valid_url(fmt_url, video_id, format_id):
|
||||
f = m3u8_format.copy()
|
||||
f.update({
|
||||
'url': fmt_url,
|
||||
'format_id': format_id,
|
||||
'protocol': 'http',
|
||||
})
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
except ExtractorError:
|
||||
abr = 64
|
||||
for vbr, width, height in self._FORMATS:
|
||||
tbr = vbr + abr
|
||||
format_id = 'http-%s' % tbr
|
||||
fmt_url = 'http://dnl.adv.tf1.fr/2/USP-0x0/%s/%s/%s/ssm/%s-%s-64k.mp4' % (video_id[-4:-2], video_id[-2:], video_id, video_id, vbr)
|
||||
if self._is_valid_url(fmt_url, video_id, format_id):
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': fmt_url,
|
||||
'vbr': vbr,
|
||||
'abr': abr,
|
||||
'width': width,
|
||||
'height': height,
|
||||
})
|
||||
|
||||
date_diffusion = first_chapter.get('date_diffusion') or video_data.get('configv4', {}).get('estatS4')
|
||||
upload_date = unified_strdate(date_diffusion) if date_diffusion else None
|
||||
duration = None
|
||||
files = video_info['files']
|
||||
if files:
|
||||
duration = int_or_none(files[0].get('duration'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': first_chapter['title'],
|
||||
'thumbnail': first_chapter['preview'],
|
||||
'description': first_chapter['description'],
|
||||
'view_count': video_info['views'],
|
||||
'title': title,
|
||||
'thumbnail': first_chapter.get('preview'),
|
||||
'description': first_chapter.get('description'),
|
||||
'view_count': int_or_none(video_info.get('views')),
|
||||
'upload_date': upload_date,
|
||||
'duration': video_info['files'][0]['duration'],
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
@@ -122,6 +122,7 @@ DATE_FORMATS = (
|
||||
'%Y %m %d',
|
||||
'%Y-%m-%d',
|
||||
'%Y/%m/%d',
|
||||
'%Y/%m/%d %H:%M',
|
||||
'%Y/%m/%d %H:%M:%S',
|
||||
'%Y-%m-%d %H:%M:%S',
|
||||
'%Y-%m-%d %H:%M:%S.%f',
|
||||
@@ -2409,6 +2410,8 @@ def dfxp2srt(dfxp_data):
|
||||
|
||||
def cli_option(params, command_option, param):
|
||||
param = params.get(param)
|
||||
if param:
|
||||
param = compat_str(param)
|
||||
return [command_option, param] if param is not None else []
|
||||
|
||||
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2016.08.10'
|
||||
__version__ = '2016.08.13'
|
||||
|
||||
Reference in New Issue
Block a user