mirror of
https://source.netsyms.com/Mirrors/youtube-dl
synced 2026-03-30 18:02:21 +00:00
Compare commits
142 Commits
2016.04.19
...
2016.05.01
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
174aba3223 | ||
|
|
4bd143a3a0 | ||
|
|
6f27bf1c74 | ||
|
|
68bb2fef95 | ||
|
|
854cc54bc1 | ||
|
|
651ad35ce0 | ||
|
|
6a0f9a24d0 | ||
|
|
9cf79e8f4b | ||
|
|
2844b09336 | ||
|
|
1a2b377cc2 | ||
|
|
4c1b2e5c0e | ||
|
|
9e1b96ae40 | ||
|
|
fc35cd9e0c | ||
|
|
339fe7228a | ||
|
|
ea7e7fecbd | ||
|
|
d00b93d58c | ||
|
|
93f7a31bf3 | ||
|
|
33a1ec950c | ||
|
|
4e0c0c1508 | ||
|
|
89c0dc9a5f | ||
|
|
f628d800fb | ||
|
|
11fa3d7f99 | ||
|
|
d41ee7b774 | ||
|
|
e0e9bbb0e9 | ||
|
|
7691184a31 | ||
|
|
35cd2f4c25 | ||
|
|
350d7963db | ||
|
|
cbc032c8b7 | ||
|
|
69c4cde4ba | ||
|
|
ca278a182b | ||
|
|
373e1230e4 | ||
|
|
cd63d091ce | ||
|
|
0571ffda7d | ||
|
|
5556047465 | ||
|
|
65a3bfb379 | ||
|
|
cef3f3011f | ||
|
|
e9c6cdf4a1 | ||
|
|
00a17a9e12 | ||
|
|
8312b1a3d1 | ||
|
|
6ff4469528 | ||
|
|
68835d687a | ||
|
|
9d186afac8 | ||
|
|
151d98130b | ||
|
|
b24d6336a7 | ||
|
|
065216d94f | ||
|
|
67167920db | ||
|
|
14638e2915 | ||
|
|
1910077ed7 | ||
|
|
5819edef03 | ||
|
|
f5535ed0e3 | ||
|
|
31ff3c074e | ||
|
|
72670c39de | ||
|
|
683d892bf9 | ||
|
|
497971cd4a | ||
|
|
e757fb3d05 | ||
|
|
0ba9e3ca22 | ||
|
|
4b53762914 | ||
|
|
eebe6b382e | ||
|
|
0cbcbdd89d | ||
|
|
7f776fa4b5 | ||
|
|
eb5ad31ce1 | ||
|
|
a5941305b6 | ||
|
|
f8dddaf456 | ||
|
|
618c71dc64 | ||
|
|
52af8f222b | ||
|
|
3cc8649c9d | ||
|
|
dcf094d626 | ||
|
|
5b5d7cc11e | ||
|
|
2ac2cbc0a3 | ||
|
|
a7e03861e8 | ||
|
|
046ea04a7d | ||
|
|
7464360379 | ||
|
|
175c2e9ec3 | ||
|
|
f1f879098a | ||
|
|
c9fd530670 | ||
|
|
749b0046a8 | ||
|
|
e3de3d6f2f | ||
|
|
ad58942d57 | ||
|
|
4645432d7a | ||
|
|
6bdc2d5358 | ||
|
|
2beff95da5 | ||
|
|
abc1723edd | ||
|
|
b248e6485b | ||
|
|
d6712378e7 | ||
|
|
fb72ec58ae | ||
|
|
c83a352227 | ||
|
|
e9063b5de9 | ||
|
|
594b0c4c69 | ||
|
|
eb9ee19422 | ||
|
|
a1394b820d | ||
|
|
aa9dc24f5a | ||
|
|
51762e1a31 | ||
|
|
8b38f2ac40 | ||
|
|
a82398bd72 | ||
|
|
c14dc00df3 | ||
|
|
03dd60ca41 | ||
|
|
0738187f9b | ||
|
|
a956cb6306 | ||
|
|
a8062eabcd | ||
|
|
2a7dee8cc5 | ||
|
|
d9ed362116 | ||
|
|
4f54958097 | ||
|
|
2a7c38831c | ||
|
|
949b6497cc | ||
|
|
2c21152ca7 | ||
|
|
fda9a1ca9e | ||
|
|
864d5e7231 | ||
|
|
5448b781f6 | ||
|
|
e239413fbc | ||
|
|
fd0ff8bad8 | ||
|
|
397ec446f3 | ||
|
|
29a7e8f6f8 | ||
|
|
eb01e97e10 | ||
|
|
cb7d4d0efd | ||
|
|
c80037918b | ||
|
|
237a41108a | ||
|
|
e962ae15d3 | ||
|
|
7c36ea7d54 | ||
|
|
9260cf1d97 | ||
|
|
bdbb8530c7 | ||
|
|
09a9fadb84 | ||
|
|
bf09af3acb | ||
|
|
88296ac326 | ||
|
|
870d525848 | ||
|
|
6577112890 | ||
|
|
1988647dda | ||
|
|
a292cba256 | ||
|
|
982e518a96 | ||
|
|
748e730099 | ||
|
|
b6c0d4f431 | ||
|
|
acaff49575 | ||
|
|
1da19488f9 | ||
|
|
442c4d361f | ||
|
|
ec59d657e7 | ||
|
|
99ef96f84c | ||
|
|
4dccea8ad0 | ||
|
|
2c0d9c6217 | ||
|
|
12a5134596 | ||
|
|
16e633a5d7 | ||
|
|
494ab6db73 | ||
|
|
107701fcfc | ||
|
|
f77970765a |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.04.19*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.04.19**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.01*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.01**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2016.04.19
|
||||
[debug] youtube-dl version 2016.05.01
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
||||
1
AUTHORS
1
AUTHORS
@@ -168,3 +168,4 @@ José Joaquín Atria
|
||||
Viťas Strádal
|
||||
Kagami Hiiragi
|
||||
Philip Huppert
|
||||
blahgeek
|
||||
|
||||
22
README.md
22
README.md
@@ -176,7 +176,9 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
--xattr-set-filesize Set file xattribute ytdl.filesize with
|
||||
expected filesize (experimental)
|
||||
--hls-prefer-native Use the native HLS downloader instead of
|
||||
ffmpeg (experimental)
|
||||
ffmpeg
|
||||
--hls-prefer-ffmpeg Use ffmpeg instead of the native HLS
|
||||
downloader
|
||||
--hls-use-mpegts Use the mpegts container for HLS videos,
|
||||
allowing to play the video while
|
||||
downloading (some players may not be able
|
||||
@@ -515,6 +517,18 @@ Available for the video that is an episode of some series or programme:
|
||||
- `episode_number`: Number of the video episode within a season
|
||||
- `episode_id`: Id of the video episode
|
||||
|
||||
Available for the media that is a track or a part of a music album:
|
||||
- `track`: Title of the track
|
||||
- `track_number`: Number of the track within an album or a disc
|
||||
- `track_id`: Id of the track
|
||||
- `artist`: Artist(s) of the track
|
||||
- `genre`: Genre(s) of the track
|
||||
- `album`: Title of the album the track belongs to
|
||||
- `album_type`: Type of the album
|
||||
- `album_artist`: List of all artists appeared on the album
|
||||
- `disc_number`: Number of the disc or other physical medium the track belongs to
|
||||
- `release_year`: Year (YYYY) when the album was released
|
||||
|
||||
Each aforementioned sequence when referenced in output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by particular extractor, such sequences will be replaced with `NA`.
|
||||
|
||||
For example for `-o %(title)s-%(id)s.%(ext)s` and mp4 video with title `youtube-dl test video` and id `BaW_jenozKcj` this will result in a `youtube-dl test video-BaW_jenozKcj.mp4` file created in the current directory.
|
||||
@@ -683,6 +697,10 @@ YouTube changed their playlist format in March 2014 and later on, so you'll need
|
||||
|
||||
If you have installed youtube-dl with a package manager, pip, setup.py or a tarball, please use that to update. Note that Ubuntu packages do not seem to get updated anymore. Since we are not affiliated with Ubuntu, there is little we can do. Feel free to [report bugs](https://bugs.launchpad.net/ubuntu/+source/youtube-dl/+filebug) to the [Ubuntu packaging guys](mailto:ubuntu-motu@lists.ubuntu.com?subject=outdated%20version%20of%20youtube-dl) - all they have to do is update the package to a somewhat recent version. See above for a way to update.
|
||||
|
||||
### I'm getting an error when trying to use output template: `error: using output template conflicts with using title, video ID or auto number`
|
||||
|
||||
Make sure you are not using `-o` with any of these options `-t`, `--title`, `--id`, `-A` or `--auto-number` set in command line or in a configuration file. Remove the latter if any.
|
||||
|
||||
### Do I always have to pass `-citw`?
|
||||
|
||||
By default, youtube-dl intends to have the best options (incidentally, if you have a convincing case that these should be different, [please file an issue where you explain that](https://yt-dl.org/bug)). Therefore, it is unnecessary and sometimes harmful to copy long option strings from webpages. In particular, the only option out of `-citw` that is regularly useful is `-i`.
|
||||
@@ -703,7 +721,7 @@ Videos or video formats streamed via RTMP protocol can only be downloaded when [
|
||||
|
||||
### I have downloaded a video but how can I play it?
|
||||
|
||||
Once the video is fully downloaded, use any video player, such as [vlc](http://www.videolan.org) or [mplayer](http://www.mplayerhq.hu/).
|
||||
Once the video is fully downloaded, use any video player, such as [mpv](https://mpv.io/), [vlc](http://www.videolan.org) or [mplayer](http://www.mplayerhq.hu/).
|
||||
|
||||
### I extracted a video URL with `-g`, but it does not play on another machine / in my webbrowser.
|
||||
|
||||
|
||||
@@ -163,6 +163,7 @@
|
||||
- **defense.gouv.fr**
|
||||
- **democracynow**
|
||||
- **DHM**: Filmarchiv - Deutsches Historisches Museum
|
||||
- **DigitallySpeaking**
|
||||
- **Digiteka**
|
||||
- **Discovery**
|
||||
- **Dotsub**
|
||||
@@ -174,7 +175,6 @@
|
||||
- **Dropbox**
|
||||
- **DrTuber**
|
||||
- **DRTV**
|
||||
- **Dump**
|
||||
- **Dumpert**
|
||||
- **dvtv**: http://video.aktualne.cz/
|
||||
- **dw**
|
||||
@@ -338,13 +338,13 @@
|
||||
- **mailru**: Видео@Mail.Ru
|
||||
- **MakersChannel**
|
||||
- **MakerTV**
|
||||
- **Malemotion**
|
||||
- **MatchTV**
|
||||
- **MDR**: MDR.DE and KiKA
|
||||
- **media.ccc.de**
|
||||
- **metacafe**
|
||||
- **Metacritic**
|
||||
- **Mgoon**
|
||||
- **MGTV**: 芒果TV
|
||||
- **Minhateca**
|
||||
- **MinistryGrid**
|
||||
- **Minoto**
|
||||
@@ -374,8 +374,8 @@
|
||||
- **mtvservices:embedded**
|
||||
- **MuenchenTV**: münchen.tv
|
||||
- **MusicPlayOn**
|
||||
- **muzu.tv**
|
||||
- **Mwave**
|
||||
- **MwaveMeetGreet**
|
||||
- **MySpace**
|
||||
- **MySpace:album**
|
||||
- **MySpass**
|
||||
@@ -413,7 +413,8 @@
|
||||
- **nfl.com**
|
||||
- **nhl.com**
|
||||
- **nhl.com:news**: NHL news
|
||||
- **nhl.com:videocenter**: NHL videocenter category
|
||||
- **nhl.com:videocenter**
|
||||
- **nhl.com:videocenter:category**: NHL videocenter category
|
||||
- **nick.com**
|
||||
- **niconico**: ニコニコ動画
|
||||
- **NiconicoPlaylist**
|
||||
@@ -461,13 +462,13 @@
|
||||
- **Patreon**
|
||||
- **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC)
|
||||
- **pcmag**
|
||||
- **People**
|
||||
- **Periscope**: Periscope
|
||||
- **PhilharmonieDeParis**: Philharmonie de Paris
|
||||
- **phoenix.de**
|
||||
- **Photobucket**
|
||||
- **Pinkbike**
|
||||
- **Pladform**
|
||||
- **PlanetaPlay**
|
||||
- **play.fm**
|
||||
- **played.to**
|
||||
- **PlaysTV**
|
||||
@@ -497,7 +498,6 @@
|
||||
- **qqmusic:playlist**: QQ音乐 - 歌单
|
||||
- **qqmusic:singer**: QQ音乐 - 歌手
|
||||
- **qqmusic:toplist**: QQ音乐 - 排行榜
|
||||
- **QuickVid**
|
||||
- **R7**
|
||||
- **radio.de**
|
||||
- **radiobremen**
|
||||
@@ -553,7 +553,6 @@
|
||||
- **SenateISVP**
|
||||
- **ServingSys**
|
||||
- **Sexu**
|
||||
- **SexyKarma**: Sexy Karma and Watch Indian Porn
|
||||
- **Shahid**
|
||||
- **Shared**: shared.sx and vivo.sx
|
||||
- **ShareSix**
|
||||
@@ -566,8 +565,6 @@
|
||||
- **smotri:broadcast**: Smotri.com broadcasts
|
||||
- **smotri:community**: Smotri.com community videos
|
||||
- **smotri:user**: Smotri.com user videos
|
||||
- **SnagFilms**
|
||||
- **SnagFilmsEmbed**
|
||||
- **Snotr**
|
||||
- **Sohu**
|
||||
- **soundcloud**
|
||||
@@ -609,6 +606,7 @@
|
||||
- **Syfy**
|
||||
- **SztvHu**
|
||||
- **Tagesschau**
|
||||
- **tagesschau:player**
|
||||
- **Tapely**
|
||||
- **Tass**
|
||||
- **TDSLifeway**
|
||||
@@ -724,6 +722,8 @@
|
||||
- **Vidzi**
|
||||
- **vier**
|
||||
- **vier:videos**
|
||||
- **ViewLift**
|
||||
- **ViewLiftEmbed**
|
||||
- **Viewster**
|
||||
- **Viidea**
|
||||
- **viki**
|
||||
@@ -755,6 +755,7 @@
|
||||
- **Walla**
|
||||
- **WashingtonPost**
|
||||
- **wat.tv**
|
||||
- **WatchIndianPorn**: Watch Indian Porn
|
||||
- **WDR**
|
||||
- **wdr:mobile**
|
||||
- **WDRMaus**: Sendung mit der Maus
|
||||
@@ -774,6 +775,10 @@
|
||||
- **XFileShare**: XFileShare based sites: GorillaVid.in, daclips.in, movpod.in, fastvideo.in, realvid.net, filehoot.com and vidto.me
|
||||
- **XHamster**
|
||||
- **XHamsterEmbed**
|
||||
- **xiami:album**: 虾米音乐 - 专辑
|
||||
- **xiami:artist**: 虾米音乐 - 歌手
|
||||
- **xiami:collection**: 虾米音乐 - 精选集
|
||||
- **xiami:song**: 虾米音乐
|
||||
- **XMinus**
|
||||
- **XNXX**
|
||||
- **Xstream**
|
||||
|
||||
@@ -413,6 +413,7 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(parse_duration('01:02:03:04'), 93784)
|
||||
self.assertEqual(parse_duration('1 hour 3 minutes'), 3780)
|
||||
self.assertEqual(parse_duration('87 Min.'), 5220)
|
||||
self.assertEqual(parse_duration('PT1H0.040S'), 3600.04)
|
||||
|
||||
def test_fix_xml_ampersands(self):
|
||||
self.assertEqual(
|
||||
|
||||
@@ -260,7 +260,9 @@ class YoutubeDL(object):
|
||||
The following options determine which downloader is picked:
|
||||
external_downloader: Executable of the external downloader to call.
|
||||
None or unset for standard (built-in) downloader.
|
||||
hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
|
||||
hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
|
||||
if True, otherwise use ffmpeg/avconv if False, otherwise
|
||||
use downloader suggested by extractor if None.
|
||||
|
||||
The following parameters are not used by YoutubeDL itself, they are used by
|
||||
the downloader (see youtube_dl/downloader/common.py):
|
||||
|
||||
@@ -41,9 +41,12 @@ def get_suitable_downloader(info_dict, params={}):
|
||||
if ed.can_download(info_dict):
|
||||
return ed
|
||||
|
||||
if protocol == 'm3u8' and params.get('hls_prefer_native'):
|
||||
if protocol == 'm3u8' and params.get('hls_prefer_native') is True:
|
||||
return HlsFD
|
||||
|
||||
if protocol == 'm3u8_native' and params.get('hls_prefer_native') is False:
|
||||
return FFmpegFD
|
||||
|
||||
return PROTOCOL_MAP.get(protocol, HttpFD)
|
||||
|
||||
|
||||
|
||||
@@ -63,7 +63,7 @@ class ArteTvIE(InfoExtractor):
|
||||
|
||||
class ArteTVPlus7IE(InfoExtractor):
|
||||
IE_NAME = 'arte.tv:+7'
|
||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de|en|es)/(?:(?:sendungen|emissions|embed)/)?(?P<id>[^/]+)/(?P<name>[^/?#&+])'
|
||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de|en|es)/(?:(?:sendungen|emissions|embed)/)?(?P<id>[^/]+)/(?P<name>[^/?#&]+)'
|
||||
|
||||
@classmethod
|
||||
def _extract_url_info(cls, url):
|
||||
|
||||
@@ -33,6 +33,7 @@ class CBCIE(InfoExtractor):
|
||||
'title': 'Robin Williams freestyles on 90 Minutes Live',
|
||||
'description': 'Wacky American comedian Robin Williams shows off his infamous "freestyle" comedic talents while being interviewed on CBC\'s 90 Minutes Live.',
|
||||
'upload_date': '19700101',
|
||||
'uploader': 'CBCC-NEW',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
|
||||
@@ -1,13 +1,9 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
qualities,
|
||||
unified_strdate,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
@@ -19,14 +15,14 @@ class CCCIE(InfoExtractor):
|
||||
'url': 'https://media.ccc.de/v/30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor#video',
|
||||
'md5': '3a1eda8f3a29515d27f5adb967d7e740',
|
||||
'info_dict': {
|
||||
'id': '30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor',
|
||||
'id': '1839',
|
||||
'ext': 'mp4',
|
||||
'title': 'Introduction to Processor Design',
|
||||
'description': 'md5:80be298773966f66d56cb11260b879af',
|
||||
'description': 'md5:df55f6d073d4ceae55aae6f2fd98a0ac',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'view_count': int,
|
||||
'upload_date': '20131228',
|
||||
'duration': 3660,
|
||||
'timestamp': 1388188800,
|
||||
'duration': 3710,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://media.ccc.de/v/32c3-7368-shopshifting#download',
|
||||
@@ -34,79 +30,48 @@ class CCCIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
event_id = self._search_regex("data-id='(\d+)'", webpage, 'event id')
|
||||
event_data = self._download_json('https://media.ccc.de/public/events/%s' % event_id, event_id)
|
||||
|
||||
if self._downloader.params.get('prefer_free_formats'):
|
||||
preference = qualities(['mp3', 'opus', 'mp4-lq', 'webm-lq', 'h264-sd', 'mp4-sd', 'webm-sd', 'mp4', 'webm', 'mp4-hd', 'h264-hd', 'webm-hd'])
|
||||
else:
|
||||
preference = qualities(['opus', 'mp3', 'webm-lq', 'mp4-lq', 'webm-sd', 'h264-sd', 'mp4-sd', 'webm', 'mp4', 'webm-hd', 'mp4-hd', 'h264-hd'])
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'(?s)<h1>(.*?)</h1>', webpage, 'title')
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<h3>About</h3>(.+?)<h3>',
|
||||
webpage, 'description', fatal=False)
|
||||
upload_date = unified_strdate(self._html_search_regex(
|
||||
r"(?s)<span[^>]+class='[^']*fa-calendar-o'[^>]*>(.+?)</span>",
|
||||
webpage, 'upload date', fatal=False))
|
||||
view_count = int_or_none(self._html_search_regex(
|
||||
r"(?s)<span class='[^']*fa-eye'></span>(.*?)</li>",
|
||||
webpage, 'view count', fatal=False))
|
||||
duration = parse_duration(self._html_search_regex(
|
||||
r'(?s)<span[^>]+class=(["\']).*?fa-clock-o.*?\1[^>]*></span>(?P<duration>.+?)</li',
|
||||
webpage, 'duration', fatal=False, group='duration'))
|
||||
|
||||
matches = re.finditer(r'''(?xs)
|
||||
<(?:span|div)\s+class='label\s+filetype'>(?P<format>[^<]*)</(?:span|div)>\s*
|
||||
<(?:span|div)\s+class='label\s+filetype'>(?P<lang>[^<]*)</(?:span|div)>\s*
|
||||
<a\s+download\s+href='(?P<http_url>[^']+)'>\s*
|
||||
(?:
|
||||
.*?
|
||||
<a\s+(?:download\s+)?href='(?P<torrent_url>[^']+\.torrent)'
|
||||
)?''', webpage)
|
||||
formats = []
|
||||
for m in matches:
|
||||
format = m.group('format')
|
||||
format_id = self._search_regex(
|
||||
r'.*/([a-z0-9_-]+)/[^/]*$',
|
||||
m.group('http_url'), 'format id', default=None)
|
||||
if format_id:
|
||||
format_id = m.group('lang') + '-' + format_id
|
||||
vcodec = 'h264' if 'h264' in format_id else (
|
||||
'none' if format_id in ('mp3', 'opus') else None
|
||||
for recording in event_data.get('recordings', []):
|
||||
recording_url = recording.get('recording_url')
|
||||
if not recording_url:
|
||||
continue
|
||||
language = recording.get('language')
|
||||
folder = recording.get('folder')
|
||||
format_id = None
|
||||
if language:
|
||||
format_id = language
|
||||
if folder:
|
||||
if language:
|
||||
format_id += '-' + folder
|
||||
else:
|
||||
format_id = folder
|
||||
vcodec = 'h264' if 'h264' in folder else (
|
||||
'none' if folder in ('mp3', 'opus') else None
|
||||
)
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'format': format,
|
||||
'language': m.group('lang'),
|
||||
'url': m.group('http_url'),
|
||||
'url': recording_url,
|
||||
'width': int_or_none(recording.get('width')),
|
||||
'height': int_or_none(recording.get('height')),
|
||||
'filesize': int_or_none(recording.get('size'), invscale=1024 * 1024),
|
||||
'language': language,
|
||||
'vcodec': vcodec,
|
||||
'preference': preference(format_id),
|
||||
})
|
||||
|
||||
if m.group('torrent_url'):
|
||||
formats.append({
|
||||
'format_id': 'torrent-%s' % (format if format_id is None else format_id),
|
||||
'format': '%s (torrent)' % format,
|
||||
'proto': 'torrent',
|
||||
'format_note': '(unsupported; will just download the .torrent file)',
|
||||
'vcodec': vcodec,
|
||||
'preference': -100 + preference(format_id),
|
||||
'url': m.group('torrent_url'),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = self._html_search_regex(
|
||||
r"<video.*?poster='([^']+)'", webpage, 'thumbnail', fatal=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'view_count': view_count,
|
||||
'upload_date': upload_date,
|
||||
'duration': duration,
|
||||
'id': event_id,
|
||||
'display_id': display_id,
|
||||
'title': event_data['title'],
|
||||
'description': event_data.get('description'),
|
||||
'thumbnail': event_data.get('thumb_url'),
|
||||
'timestamp': parse_iso8601(event_data.get('date')),
|
||||
'duration': int_or_none(event_data.get('length')),
|
||||
'tags': event_data.get('tags'),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
@@ -19,7 +19,7 @@ from ..utils import (
|
||||
class CloudyIE(InfoExtractor):
|
||||
_IE_DESC = 'cloudy.ec and videoraj.ch'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?(?P<host>cloudy\.ec|videoraj\.ch)/
|
||||
https?://(?:www\.)?(?P<host>cloudy\.ec|videoraj\.(?:ch|to))/
|
||||
(?:v/|embed\.php\?id=)
|
||||
(?P<id>[A-Za-z0-9]+)
|
||||
'''
|
||||
@@ -37,7 +37,7 @@ class CloudyIE(InfoExtractor):
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.videoraj.ch/v/47f399fd8bb60',
|
||||
'url': 'http://www.videoraj.to/v/47f399fd8bb60',
|
||||
'md5': '7d0f8799d91efd4eda26587421c3c3b0',
|
||||
'info_dict': {
|
||||
'id': '47f399fd8bb60',
|
||||
|
||||
@@ -382,7 +382,7 @@ class InfoExtractor(object):
|
||||
else:
|
||||
if query:
|
||||
url_or_request = update_url_query(url_or_request, query)
|
||||
if data or headers:
|
||||
if data is not None or headers:
|
||||
url_or_request = sanitized_Request(url_or_request, data, headers)
|
||||
try:
|
||||
return self._downloader.urlopen(url_or_request)
|
||||
@@ -1006,6 +1006,13 @@ class InfoExtractor(object):
|
||||
def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, f4m_id=None,
|
||||
transform_source=lambda s: fix_xml_ampersands(s).strip(),
|
||||
fatal=True):
|
||||
# currently youtube-dl cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy
|
||||
akamai_pv = manifest.find('{http://ns.adobe.com/f4m/1.0}pv-2.0')
|
||||
if akamai_pv is not None and ';' in akamai_pv.text:
|
||||
playerVerificationChallenge = akamai_pv.text.split(';')[0]
|
||||
if playerVerificationChallenge.strip() != '':
|
||||
return []
|
||||
|
||||
formats = []
|
||||
manifest_version = '1.0'
|
||||
media_nodes = manifest.findall('{http://ns.adobe.com/f4m/1.0}media')
|
||||
@@ -1054,7 +1061,7 @@ class InfoExtractor(object):
|
||||
def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
|
||||
entry_protocol='m3u8', preference=None,
|
||||
m3u8_id=None, note=None, errnote=None,
|
||||
fatal=True):
|
||||
fatal=True, live=False):
|
||||
|
||||
formats = [{
|
||||
'format_id': '-'.join(filter(None, [m3u8_id, 'meta'])),
|
||||
@@ -1132,7 +1139,11 @@ class InfoExtractor(object):
|
||||
if m3u8_id:
|
||||
format_id.append(m3u8_id)
|
||||
last_media_name = last_media.get('NAME') if last_media and last_media.get('TYPE') != 'SUBTITLES' else None
|
||||
format_id.append(last_media_name if last_media_name else '%d' % (tbr if tbr else len(formats)))
|
||||
# Bandwidth of live streams may differ over time thus making
|
||||
# format_id unpredictable. So it's better to keep provided
|
||||
# format_id intact.
|
||||
if not live:
|
||||
format_id.append(last_media_name if last_media_name else '%d' % (tbr if tbr else len(formats)))
|
||||
f = {
|
||||
'format_id': '-'.join(format_id),
|
||||
'url': format_url(line.strip()),
|
||||
|
||||
@@ -11,7 +11,6 @@ from math import pow, sqrt, floor
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
@@ -27,6 +26,7 @@ from ..utils import (
|
||||
unified_strdate,
|
||||
urlencode_postdata,
|
||||
xpath_text,
|
||||
extract_attributes,
|
||||
)
|
||||
from ..aes import (
|
||||
aes_cbc_decrypt,
|
||||
@@ -306,28 +306,36 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', webpage,
|
||||
'video_uploader', fatal=False)
|
||||
|
||||
playerdata_url = compat_urllib_parse_unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url'))
|
||||
playerdata_req = sanitized_Request(playerdata_url)
|
||||
playerdata_req.data = urlencode_postdata({'current_page': webpage_url})
|
||||
playerdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
playerdata = self._download_webpage(playerdata_req, video_id, note='Downloading media info')
|
||||
|
||||
stream_id = self._search_regex(r'<media_id>([^<]+)', playerdata, 'stream_id')
|
||||
video_thumbnail = self._search_regex(r'<episode_image_url>([^<]+)', playerdata, 'thumbnail', fatal=False)
|
||||
|
||||
available_fmts = []
|
||||
for a, fmt in re.findall(r'(<a[^>]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage):
|
||||
attrs = extract_attributes(a)
|
||||
href = attrs.get('href')
|
||||
if href and '/freetrial' in href:
|
||||
continue
|
||||
available_fmts.append(fmt)
|
||||
if not available_fmts:
|
||||
for p in (r'token=["\']showmedia\.([0-9]{3,4})p"', r'showmedia\.([0-9]{3,4})p'):
|
||||
available_fmts = re.findall(p, webpage)
|
||||
if available_fmts:
|
||||
break
|
||||
video_encode_ids = []
|
||||
formats = []
|
||||
for fmt in re.findall(r'showmedia\.([0-9]{3,4})p', webpage):
|
||||
for fmt in available_fmts:
|
||||
stream_quality, stream_format = self._FORMAT_IDS[fmt]
|
||||
video_format = fmt + 'p'
|
||||
streamdata_req = sanitized_Request(
|
||||
'http://www.crunchyroll.com/xml/?req=RpcApiVideoPlayer_GetStandardConfig&media_id=%s&video_format=%s&video_quality=%s'
|
||||
% (stream_id, stream_format, stream_quality),
|
||||
% (video_id, stream_format, stream_quality),
|
||||
compat_urllib_parse_urlencode({'current_page': url}).encode('utf-8'))
|
||||
streamdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
streamdata = self._download_xml(
|
||||
streamdata_req, video_id,
|
||||
note='Downloading media info for %s' % video_format)
|
||||
stream_info = streamdata.find('./{default}preload/stream_info')
|
||||
video_encode_id = xpath_text(stream_info, './video_encode_id')
|
||||
if video_encode_id in video_encode_ids:
|
||||
continue
|
||||
video_encode_ids.append(video_encode_id)
|
||||
video_url = xpath_text(stream_info, './host')
|
||||
video_play_path = xpath_text(stream_info, './file')
|
||||
if not video_url or not video_play_path:
|
||||
@@ -359,6 +367,14 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
'ext': 'flv',
|
||||
})
|
||||
formats.append(format_info)
|
||||
self._sort_formats(formats)
|
||||
|
||||
metadata = self._download_xml(
|
||||
'http://www.crunchyroll.com/xml', video_id,
|
||||
note='Downloading media info', query={
|
||||
'req': 'RpcApiVideoPlayer_GetMediaMetadata',
|
||||
'media_id': video_id,
|
||||
})
|
||||
|
||||
subtitles = self.extract_subtitles(video_id, webpage)
|
||||
|
||||
@@ -366,9 +382,12 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'description': video_description,
|
||||
'thumbnail': video_thumbnail,
|
||||
'thumbnail': xpath_text(metadata, 'episode_image_url'),
|
||||
'uploader': video_uploader,
|
||||
'upload_date': video_upload_date,
|
||||
'series': xpath_text(metadata, 'series_title'),
|
||||
'episode': xpath_text(metadata, 'episode_title'),
|
||||
'episode_number': int_or_none(xpath_text(metadata, 'episode_number')),
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
@@ -9,7 +9,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class CWTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?cw(?:tv|seed)\.com/shows/(?:[^/]+/){2}\?play=(?P<id>[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})'
|
||||
_VALID_URL = r'https?://(?:www\.)?cw(?:tv|seed)\.com/(?:shows/)?(?:[^/]+/){2}\?.*\bplay=(?P<id>[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})'
|
||||
_TESTS = [{
|
||||
'url': 'http://cwtv.com/shows/arrow/legends-of-yesterday/?play=6b15e985-9345-4f60-baf8-56e96be57c63',
|
||||
'info_dict': {
|
||||
@@ -48,6 +48,9 @@ class CWTVIE(InfoExtractor):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://cwtv.com/thecw/chroniclesofcisco/?play=8adebe35-f447-465f-ab52-e863506ff6d6',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -12,39 +12,46 @@ class DFBIE(InfoExtractor):
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://tv.dfb.de/video/u-19-em-stimmen-zum-spiel-gegen-russland/11633/',
|
||||
# The md5 is different each time
|
||||
'md5': 'ac0f98a52a330f700b4b3034ad240649',
|
||||
'info_dict': {
|
||||
'id': '11633',
|
||||
'display_id': 'u-19-em-stimmen-zum-spiel-gegen-russland',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'U 19-EM: Stimmen zum Spiel gegen Russland',
|
||||
'upload_date': '20150714',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
display_id, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
player_info = self._download_xml(
|
||||
'http://tv.dfb.de/server/hd_video.php?play=%s' % video_id,
|
||||
display_id)
|
||||
video_info = player_info.find('video')
|
||||
stream_access_url = self._proto_relative_url(video_info.find('url').text.strip())
|
||||
|
||||
f4m_info = self._download_xml(
|
||||
self._proto_relative_url(video_info.find('url').text.strip()), display_id)
|
||||
token_el = f4m_info.find('token')
|
||||
manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth'] + '&hdcore=3.2.0'
|
||||
formats = self._extract_f4m_formats(manifest_url, display_id)
|
||||
formats = []
|
||||
# see http://tv.dfb.de/player/js/ajax.js for the method to extract m3u8 formats
|
||||
for sa_url in (stream_access_url, stream_access_url + '&area=&format=iphone'):
|
||||
stream_access_info = self._download_xml(sa_url, display_id)
|
||||
token_el = stream_access_info.find('token')
|
||||
manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth']
|
||||
if '.f4m' in manifest_url:
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
manifest_url + '&hdcore=3.2.0',
|
||||
display_id, f4m_id='hds', fatal=False))
|
||||
else:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
manifest_url, display_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': video_info.find('title').text,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'thumbnail': 'http://tv.dfb.de/images/%s_640x360.jpg' % video_id,
|
||||
'upload_date': unified_strdate(video_info.find('time_date').text),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
@@ -33,6 +33,7 @@ class DiscoveryIE(InfoExtractor):
|
||||
'duration': 156,
|
||||
'timestamp': 1302032462,
|
||||
'upload_date': '20110405',
|
||||
'uploader_id': '103207',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
@@ -54,7 +55,11 @@ class DiscoveryIE(InfoExtractor):
|
||||
'upload_date': '20140725',
|
||||
'timestamp': 1406246400,
|
||||
'duration': 116,
|
||||
'uploader_id': '103207',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -66,13 +71,19 @@ class DiscoveryIE(InfoExtractor):
|
||||
entries = []
|
||||
|
||||
for idx, video_info in enumerate(info['playlist']):
|
||||
formats = self._extract_m3u8_formats(
|
||||
video_info['src'], display_id, 'mp4', 'm3u8_native', m3u8_id='hls',
|
||||
note='Download m3u8 information for video %d' % (idx + 1))
|
||||
self._sort_formats(formats)
|
||||
subtitles = []
|
||||
caption_url = video_info.get('captionsUrl')
|
||||
if caption_url:
|
||||
subtitles = {
|
||||
'en': [{
|
||||
'url': caption_url,
|
||||
}]
|
||||
}
|
||||
|
||||
entries.append({
|
||||
'_type': 'url_transparent',
|
||||
'url': 'http://players.brightcove.net/103207/default_default/index.html?videoId=ref:%s' % video_info['referenceId'],
|
||||
'id': compat_str(video_info['id']),
|
||||
'formats': formats,
|
||||
'title': video_info['title'],
|
||||
'description': video_info.get('description'),
|
||||
'duration': parse_duration(video_info.get('video_length')),
|
||||
@@ -80,6 +91,7 @@ class DiscoveryIE(InfoExtractor):
|
||||
'thumbnail': video_info.get('thumbnailURL'),
|
||||
'alt_title': video_info.get('secondary_title'),
|
||||
'timestamp': parse_iso8601(video_info.get('publishedDate')),
|
||||
'subtitles': subtitles,
|
||||
})
|
||||
|
||||
return self.playlist_result(entries, display_id, video_title)
|
||||
|
||||
114
youtube_dl/extractor/dispeak.py
Normal file
114
youtube_dl/extractor/dispeak.py
Normal file
@@ -0,0 +1,114 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
remove_end,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
class DigitallySpeakingIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:evt\.dispeak|events\.digitallyspeaking)\.com/(?:[^/]+/)+xml/(?P<id>[^.]+)\.xml'
|
||||
|
||||
_TESTS = [{
|
||||
# From http://gdcvault.com/play/1023460/Tenacious-Design-and-The-Interface
|
||||
'url': 'http://evt.dispeak.com/ubm/gdc/sf16/xml/840376_BQRC.xml',
|
||||
'md5': 'a8efb6c31ed06ca8739294960b2dbabd',
|
||||
'info_dict': {
|
||||
'id': '840376_BQRC',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tenacious Design and The Interface of \'Destiny\'',
|
||||
},
|
||||
}, {
|
||||
# From http://www.gdcvault.com/play/1014631/Classic-Game-Postmortem-PAC
|
||||
'url': 'http://events.digitallyspeaking.com/gdc/sf11/xml/12396_1299111843500GMPX.xml',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _parse_mp4(self, metadata):
|
||||
video_formats = []
|
||||
video_root = None
|
||||
|
||||
mp4_video = xpath_text(metadata, './mp4video', default=None)
|
||||
if mp4_video is not None:
|
||||
mobj = re.match(r'(?P<root>https?://.*?/).*', mp4_video)
|
||||
video_root = mobj.group('root')
|
||||
if video_root is None:
|
||||
http_host = xpath_text(metadata, 'httpHost', default=None)
|
||||
if http_host:
|
||||
video_root = 'http://%s/' % http_host
|
||||
if video_root is None:
|
||||
# Hard-coded in http://evt.dispeak.com/ubm/gdc/sf16/custom/player2.js
|
||||
# Works for GPUTechConf, too
|
||||
video_root = 'http://s3-2u.digitallyspeaking.com/'
|
||||
|
||||
formats = metadata.findall('./MBRVideos/MBRVideo')
|
||||
if not formats:
|
||||
return None
|
||||
for a_format in formats:
|
||||
stream_name = xpath_text(a_format, 'streamName', fatal=True)
|
||||
video_path = re.match(r'mp4\:(?P<path>.*)', stream_name).group('path')
|
||||
url = video_root + video_path
|
||||
vbr = xpath_text(a_format, 'bitrate')
|
||||
video_formats.append({
|
||||
'url': url,
|
||||
'vbr': int_or_none(vbr),
|
||||
})
|
||||
return video_formats
|
||||
|
||||
def _parse_flv(self, metadata):
|
||||
formats = []
|
||||
akamai_url = xpath_text(metadata, './akamaiHost', fatal=True)
|
||||
audios = metadata.findall('./audios/audio')
|
||||
for audio in audios:
|
||||
formats.append({
|
||||
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
|
||||
'play_path': remove_end(audio.get('url'), '.flv'),
|
||||
'ext': 'flv',
|
||||
'vcodec': 'none',
|
||||
'format_id': audio.get('code'),
|
||||
})
|
||||
slide_video_path = xpath_text(metadata, './slideVideo', fatal=True)
|
||||
formats.append({
|
||||
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
|
||||
'play_path': remove_end(slide_video_path, '.flv'),
|
||||
'ext': 'flv',
|
||||
'format_note': 'slide deck video',
|
||||
'quality': -2,
|
||||
'preference': -2,
|
||||
'format_id': 'slides',
|
||||
})
|
||||
speaker_video_path = xpath_text(metadata, './speakerVideo', fatal=True)
|
||||
formats.append({
|
||||
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
|
||||
'play_path': remove_end(speaker_video_path, '.flv'),
|
||||
'ext': 'flv',
|
||||
'format_note': 'speaker video',
|
||||
'quality': -1,
|
||||
'preference': -1,
|
||||
'format_id': 'speaker',
|
||||
})
|
||||
return formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
xml_description = self._download_xml(url, video_id)
|
||||
metadata = xpath_element(xml_description, 'metadata')
|
||||
|
||||
video_formats = self._parse_mp4(metadata)
|
||||
if video_formats is None:
|
||||
video_formats = self._parse_flv(metadata)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': video_formats,
|
||||
'title': xpath_text(metadata, 'title', fatal=True),
|
||||
'duration': parse_duration(xpath_text(metadata, 'endTime')),
|
||||
'creator': xpath_text(metadata, 'speaker'),
|
||||
}
|
||||
@@ -18,7 +18,7 @@ class DouyuTVIE(InfoExtractor):
|
||||
'display_id': 'iseven',
|
||||
'ext': 'flv',
|
||||
'title': 're:^清晨醒脑!T-ara根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'description': 'md5:f34981259a03e980a3c6404190a3ed61',
|
||||
'description': 're:.*m7show@163\.com.*',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': '7师傅',
|
||||
'uploader_id': '431925',
|
||||
@@ -43,7 +43,7 @@ class DouyuTVIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Romm not found',
|
||||
'skip': 'Room not found',
|
||||
}, {
|
||||
'url': 'http://www.douyutv.com/17732',
|
||||
'info_dict': {
|
||||
@@ -51,7 +51,7 @@ class DouyuTVIE(InfoExtractor):
|
||||
'display_id': '17732',
|
||||
'ext': 'flv',
|
||||
'title': 're:^清晨醒脑!T-ara根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'description': 'md5:f34981259a03e980a3c6404190a3ed61',
|
||||
'description': 're:.*m7show@163\.com.*',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': '7师傅',
|
||||
'uploader_id': '431925',
|
||||
@@ -75,13 +75,28 @@ class DouyuTVIE(InfoExtractor):
|
||||
room_id = self._html_search_regex(
|
||||
r'"room_id"\s*:\s*(\d+),', page, 'room id')
|
||||
|
||||
prefix = 'room/%s?aid=android&client_sys=android&time=%d' % (
|
||||
room_id, int(time.time()))
|
||||
config = None
|
||||
# Douyu API sometimes returns error "Unable to load the requested class: eticket_redis_cache"
|
||||
# Retry with different parameters - same parameters cause same errors
|
||||
for i in range(5):
|
||||
prefix = 'room/%s?aid=android&client_sys=android&time=%d' % (
|
||||
room_id, int(time.time()))
|
||||
auth = hashlib.md5((prefix + '1231').encode('ascii')).hexdigest()
|
||||
|
||||
auth = hashlib.md5((prefix + '1231').encode('ascii')).hexdigest()
|
||||
config = self._download_json(
|
||||
'http://www.douyutv.com/api/v1/%s&auth=%s' % (prefix, auth),
|
||||
video_id)
|
||||
config_page = self._download_webpage(
|
||||
'http://www.douyutv.com/api/v1/%s&auth=%s' % (prefix, auth),
|
||||
video_id)
|
||||
try:
|
||||
config = self._parse_json(config_page, video_id, fatal=False)
|
||||
except ExtractorError:
|
||||
# Wait some time before retrying to get a different time() value
|
||||
self._sleep(1, video_id, msg_template='%(video_id)s: Error occurs. '
|
||||
'Waiting for %(timeout)s seconds before retrying')
|
||||
continue
|
||||
else:
|
||||
break
|
||||
if config is None:
|
||||
raise ExtractorError('Unable to fetch API result')
|
||||
|
||||
data = config['data']
|
||||
|
||||
|
||||
@@ -6,13 +6,18 @@ import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class DPlayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?P<domain>it\.dplay\.com|www\.dplay\.(?:dk|se|no))/[^/]+/(?P<id>[^/?#]+)'
|
||||
|
||||
_TESTS = [{
|
||||
# geo restricted, via direct unsigned hls URL
|
||||
'url': 'http://it.dplay.com/take-me-out/stagione-1-episodio-25/',
|
||||
'info_dict': {
|
||||
'id': '1255600',
|
||||
@@ -31,11 +36,12 @@ class DPlayIE(InfoExtractor):
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}, {
|
||||
# non geo restricted, via secure api, unsigned download hls URL
|
||||
'url': 'http://www.dplay.se/nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet/',
|
||||
'info_dict': {
|
||||
'id': '3172',
|
||||
'display_id': 'season-1-svensken-lar-sig-njuta-av-livet',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Svensken lär sig njuta av livet',
|
||||
'description': 'md5:d3819c9bccffd0fe458ca42451dd50d8',
|
||||
'duration': 2650,
|
||||
@@ -48,23 +54,25 @@ class DPlayIE(InfoExtractor):
|
||||
'age_limit': 0,
|
||||
},
|
||||
}, {
|
||||
# geo restricted, via secure api, unsigned download hls URL
|
||||
'url': 'http://www.dplay.dk/mig-og-min-mor/season-6-episode-12/',
|
||||
'info_dict': {
|
||||
'id': '70816',
|
||||
'display_id': 'season-6-episode-12',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Episode 12',
|
||||
'description': 'md5:9c86e51a93f8a4401fc9641ef9894c90',
|
||||
'duration': 2563,
|
||||
'timestamp': 1429696800,
|
||||
'upload_date': '20150422',
|
||||
'creator': 'Kanal 4',
|
||||
'creator': 'Kanal 4 (Home)',
|
||||
'series': 'Mig og min mor',
|
||||
'season_number': 6,
|
||||
'episode_number': 12,
|
||||
'age_limit': 0,
|
||||
},
|
||||
}, {
|
||||
# geo restricted, via direct unsigned hls URL
|
||||
'url': 'http://www.dplay.no/pga-tour/season-1-hoydepunkter-18-21-februar/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
@@ -90,17 +98,24 @@ class DPlayIE(InfoExtractor):
|
||||
|
||||
def extract_formats(protocol, manifest_url):
|
||||
if protocol == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
manifest_url, video_id, ext='mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id=protocol, fatal=False))
|
||||
entry_protocol='m3u8_native', m3u8_id=protocol, fatal=False)
|
||||
# Sometimes final URLs inside m3u8 are unsigned, let's fix this
|
||||
# ourselves
|
||||
query = compat_urlparse.parse_qs(compat_urlparse.urlparse(manifest_url).query)
|
||||
for m3u8_format in m3u8_formats:
|
||||
m3u8_format['url'] = update_url_query(m3u8_format['url'], query)
|
||||
formats.extend(m3u8_formats)
|
||||
elif protocol == 'hds':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
manifest_url + '&hdcore=3.8.0&plugin=flowplayer-3.8.0.0',
|
||||
video_id, f4m_id=protocol, fatal=False))
|
||||
|
||||
domain_tld = domain.split('.')[-1]
|
||||
if domain_tld in ('se', 'dk'):
|
||||
if domain_tld in ('se', 'dk', 'no'):
|
||||
for protocol in PROTOCOLS:
|
||||
# Providing dsc-geo allows to bypass geo restriction in some cases
|
||||
self._set_cookie(
|
||||
'secure.dplay.%s' % domain_tld, 'dsc-geo',
|
||||
json.dumps({
|
||||
@@ -113,13 +128,24 @@ class DPlayIE(InfoExtractor):
|
||||
'Downloading %s stream JSON' % protocol, fatal=False)
|
||||
if stream and stream.get(protocol):
|
||||
extract_formats(protocol, stream[protocol])
|
||||
else:
|
||||
|
||||
# The last resort is to try direct unsigned hls/hds URLs from info dictionary.
|
||||
# Sometimes this does work even when secure API with dsc-geo has failed (e.g.
|
||||
# http://www.dplay.no/pga-tour/season-1-hoydepunkter-18-21-februar/).
|
||||
if not formats:
|
||||
for protocol in PROTOCOLS:
|
||||
if info.get(protocol):
|
||||
extract_formats(protocol, info[protocol])
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
for lang in ('se', 'sv', 'da', 'nl', 'no'):
|
||||
for format_id in ('web_vtt', 'vtt', 'srt'):
|
||||
subtitle_url = info.get('subtitles_%s_%s' % (lang, format_id))
|
||||
if subtitle_url:
|
||||
subtitles.setdefault(lang, []).append({'url': subtitle_url})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
@@ -133,4 +159,5 @@ class DPlayIE(InfoExtractor):
|
||||
'episode_number': int_or_none(info.get('episode')),
|
||||
'age_limit': int_or_none(info.get('minimum_age')),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
@@ -1,39 +0,0 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class DumpIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:www\.)?dump\.com/(?P<id>[a-zA-Z0-9]+)/'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.dump.com/oneus/',
|
||||
'md5': 'ad71704d1e67dfd9e81e3e8b42d69d99',
|
||||
'info_dict': {
|
||||
'id': 'oneus',
|
||||
'ext': 'flv',
|
||||
'title': "He's one of us.",
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
m = re.match(self._VALID_URL, url)
|
||||
video_id = m.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_url = self._search_regex(
|
||||
r's1.addVariable\("file",\s*"([^"]+)"', webpage, 'video URL')
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
@@ -8,6 +8,7 @@ from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
url_basename,
|
||||
)
|
||||
|
||||
|
||||
@@ -22,7 +23,7 @@ class EaglePlatformIE(InfoExtractor):
|
||||
_TESTS = [{
|
||||
# http://lenta.ru/news/2015/03/06/navalny/
|
||||
'url': 'http://lentaru.media.eagleplatform.com/index/player?player=new&record_id=227304&player_template_id=5201',
|
||||
'md5': '70f5187fb620f2c1d503b3b22fd4efe3',
|
||||
# Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
|
||||
'info_dict': {
|
||||
'id': '227304',
|
||||
'ext': 'mp4',
|
||||
@@ -37,7 +38,7 @@ class EaglePlatformIE(InfoExtractor):
|
||||
# http://muz-tv.ru/play/7129/
|
||||
# http://media.clipyou.ru/index/player?record_id=12820&width=730&height=415&autoplay=true
|
||||
'url': 'eagleplatform:media.clipyou.ru:12820',
|
||||
'md5': '90b26344ba442c8e44aa4cf8f301164a',
|
||||
'md5': '358597369cf8ba56675c1df15e7af624',
|
||||
'info_dict': {
|
||||
'id': '12820',
|
||||
'ext': 'mp4',
|
||||
@@ -90,17 +91,33 @@ class EaglePlatformIE(InfoExtractor):
|
||||
|
||||
secure_m3u8 = self._proto_relative_url(media['sources']['secure_m3u8']['auto'], 'http:')
|
||||
|
||||
formats = []
|
||||
|
||||
m3u8_url = self._get_video_url(secure_m3u8, video_id, 'Downloading m3u8 JSON')
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id,
|
||||
'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
|
||||
formats.extend(m3u8_formats)
|
||||
|
||||
mp4_url = self._get_video_url(
|
||||
# Secure mp4 URL is constructed according to Player.prototype.mp4 from
|
||||
# http://lentaru.media.eagleplatform.com/player/player.js
|
||||
re.sub(r'm3u8|hlsvod|hls|f4m', 'mp4', secure_m3u8),
|
||||
video_id, 'Downloading mp4 JSON')
|
||||
formats.append({'url': mp4_url, 'format_id': 'mp4'})
|
||||
mp4_url_basename = url_basename(mp4_url)
|
||||
for m3u8_format in m3u8_formats:
|
||||
mobj = re.search('/([^/]+)/index\.m3u8', m3u8_format['url'])
|
||||
if mobj:
|
||||
http_format = m3u8_format.copy()
|
||||
video_url = mp4_url.replace(mp4_url_basename, mobj.group(1))
|
||||
if not self._is_valid_url(video_url, video_id):
|
||||
continue
|
||||
http_format.update({
|
||||
'url': video_url,
|
||||
'format_id': m3u8_format['format_id'].replace('hls', 'http'),
|
||||
'protocol': 'http',
|
||||
})
|
||||
formats.append(http_format)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
||||
@@ -193,10 +193,10 @@ from .drbonanza import DRBonanzaIE
|
||||
from .drtuber import DrTuberIE
|
||||
from .drtv import DRTVIE
|
||||
from .dvtv import DVTVIE
|
||||
from .dump import DumpIE
|
||||
from .dumpert import DumpertIE
|
||||
from .defense import DefenseGouvFrIE
|
||||
from .discovery import DiscoveryIE
|
||||
from .dispeak import DigitallySpeakingIE
|
||||
from .dropbox import DropboxIE
|
||||
from .dw import (
|
||||
DWIE,
|
||||
@@ -400,12 +400,12 @@ from .macgamestore import MacGameStoreIE
|
||||
from .mailru import MailRuIE
|
||||
from .makerschannel import MakersChannelIE
|
||||
from .makertv import MakerTVIE
|
||||
from .malemotion import MalemotionIE
|
||||
from .matchtv import MatchTVIE
|
||||
from .mdr import MDRIE
|
||||
from .metacafe import MetacafeIE
|
||||
from .metacritic import MetacriticIE
|
||||
from .mgoon import MgoonIE
|
||||
from .mgtv import MGTVIE
|
||||
from .minhateca import MinhatecaIE
|
||||
from .ministrygrid import MinistryGridIE
|
||||
from .minoto import MinotoIE
|
||||
@@ -438,8 +438,7 @@ from .mtv import (
|
||||
)
|
||||
from .muenchentv import MuenchenTVIE
|
||||
from .musicplayon import MusicPlayOnIE
|
||||
from .muzu import MuzuTVIE
|
||||
from .mwave import MwaveIE
|
||||
from .mwave import MwaveIE, MwaveMeetGreetIE
|
||||
from .myspace import MySpaceIE, MySpaceAlbumIE
|
||||
from .myspass import MySpassIE
|
||||
from .myvi import MyviIE
|
||||
@@ -489,9 +488,10 @@ from .nextmovie import NextMovieIE
|
||||
from .nfb import NFBIE
|
||||
from .nfl import NFLIE
|
||||
from .nhl import (
|
||||
NHLIE,
|
||||
NHLNewsIE,
|
||||
NHLVideocenterIE,
|
||||
NHLNewsIE,
|
||||
NHLVideocenterCategoryIE,
|
||||
NHLIE,
|
||||
)
|
||||
from .nick import NickIE
|
||||
from .niconico import NiconicoIE, NiconicoPlaylistIE
|
||||
@@ -559,12 +559,12 @@ from .pandoratv import PandoraTVIE
|
||||
from .parliamentliveuk import ParliamentLiveUKIE
|
||||
from .patreon import PatreonIE
|
||||
from .pbs import PBSIE
|
||||
from .people import PeopleIE
|
||||
from .periscope import PeriscopeIE
|
||||
from .philharmoniedeparis import PhilharmonieDeParisIE
|
||||
from .phoenix import PhoenixIE
|
||||
from .photobucket import PhotobucketIE
|
||||
from .pinkbike import PinkbikeIE
|
||||
from .planetaplay import PlanetaPlayIE
|
||||
from .pladform import PladformIE
|
||||
from .played import PlayedIE
|
||||
from .playfm import PlayFMIE
|
||||
@@ -600,7 +600,6 @@ from .qqmusic import (
|
||||
QQMusicToplistIE,
|
||||
QQMusicPlaylistIE,
|
||||
)
|
||||
from .quickvid import QuickVidIE
|
||||
from .r7 import R7IE
|
||||
from .radiode import RadioDeIE
|
||||
from .radiojavan import RadioJavanIE
|
||||
@@ -658,7 +657,6 @@ from .screenwavemedia import ScreenwaveMediaIE, TeamFourIE
|
||||
from .senateisvp import SenateISVPIE
|
||||
from .servingsys import ServingSysIE
|
||||
from .sexu import SexuIE
|
||||
from .sexykarma import SexyKarmaIE
|
||||
from .shahid import ShahidIE
|
||||
from .shared import SharedIE
|
||||
from .sharesix import ShareSixIE
|
||||
@@ -675,10 +673,6 @@ from .smotri import (
|
||||
SmotriUserIE,
|
||||
SmotriBroadcastIE,
|
||||
)
|
||||
from .snagfilms import (
|
||||
SnagFilmsIE,
|
||||
SnagFilmsEmbedIE,
|
||||
)
|
||||
from .snotr import SnotrIE
|
||||
from .sohu import SohuIE
|
||||
from .soundcloud import (
|
||||
@@ -730,7 +724,10 @@ from .svt import (
|
||||
from .swrmediathek import SWRMediathekIE
|
||||
from .syfy import SyfyIE
|
||||
from .sztvhu import SztvHuIE
|
||||
from .tagesschau import TagesschauIE
|
||||
from .tagesschau import (
|
||||
TagesschauPlayerIE,
|
||||
TagesschauIE,
|
||||
)
|
||||
from .tapely import TapelyIE
|
||||
from .tass import TassIE
|
||||
from .tdslifeway import TDSLifewayIE
|
||||
@@ -881,6 +878,10 @@ from .vidme import (
|
||||
)
|
||||
from .vidzi import VidziIE
|
||||
from .vier import VierIE, VierVideosIE
|
||||
from .viewlift import (
|
||||
ViewLiftIE,
|
||||
ViewLiftEmbedIE,
|
||||
)
|
||||
from .viewster import ViewsterIE
|
||||
from .viidea import ViideaIE
|
||||
from .vimeo import (
|
||||
@@ -919,6 +920,7 @@ from .vulture import VultureIE
|
||||
from .walla import WallaIE
|
||||
from .washingtonpost import WashingtonPostIE
|
||||
from .wat import WatIE
|
||||
from .watchindianporn import WatchIndianPornIE
|
||||
from .wdr import (
|
||||
WDRIE,
|
||||
WDRMobileIE,
|
||||
@@ -942,6 +944,12 @@ from .xhamster import (
|
||||
XHamsterIE,
|
||||
XHamsterEmbedIE,
|
||||
)
|
||||
from .xiami import (
|
||||
XiamiSongIE,
|
||||
XiamiAlbumIE,
|
||||
XiamiArtistIE,
|
||||
XiamiCollectionIE
|
||||
)
|
||||
from .xminus import XMinusIE
|
||||
from .xnxx import XNXXIE
|
||||
from .xstream import XstreamIE
|
||||
|
||||
@@ -2,6 +2,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_urllib_parse_unquote_plus,
|
||||
)
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
determine_ext,
|
||||
@@ -27,6 +31,7 @@ class FunimationIE(InfoExtractor):
|
||||
'description': 'md5:1769f43cd5fc130ace8fd87232207892',
|
||||
'thumbnail': 're:https?://.*\.jpg',
|
||||
},
|
||||
'skip': 'Access without user interaction is forbidden by CloudFlare, and video removed',
|
||||
}, {
|
||||
'url': 'http://www.funimation.com/shows/hacksign/videos/official/role-play',
|
||||
'info_dict': {
|
||||
@@ -37,6 +42,7 @@ class FunimationIE(InfoExtractor):
|
||||
'description': 'md5:b602bdc15eef4c9bbb201bb6e6a4a2dd',
|
||||
'thumbnail': 're:https?://.*\.jpg',
|
||||
},
|
||||
'skip': 'Access without user interaction is forbidden by CloudFlare',
|
||||
}, {
|
||||
'url': 'http://www.funimation.com/shows/attack-on-titan-junior-high/videos/promotional/broadcast-dub-preview',
|
||||
'info_dict': {
|
||||
@@ -47,8 +53,36 @@ class FunimationIE(InfoExtractor):
|
||||
'description': 'md5:f8ec49c0aff702a7832cd81b8a44f803',
|
||||
'thumbnail': 're:https?://.*\.(?:jpg|png)',
|
||||
},
|
||||
'skip': 'Access without user interaction is forbidden by CloudFlare',
|
||||
}]
|
||||
|
||||
_LOGIN_URL = 'http://www.funimation.com/login'
|
||||
|
||||
def _download_webpage(self, *args, **kwargs):
|
||||
try:
|
||||
return super(FunimationIE, self)._download_webpage(*args, **kwargs)
|
||||
except ExtractorError as ee:
|
||||
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
|
||||
response = ee.cause.read()
|
||||
if b'>Please complete the security check to access<' in response:
|
||||
raise ExtractorError(
|
||||
'Access to funimation.com is blocked by CloudFlare. '
|
||||
'Please browse to http://www.funimation.com/, solve '
|
||||
'the reCAPTCHA, export browser cookies to a text file,'
|
||||
' and then try again with --cookies YOUR_COOKIE_FILE.',
|
||||
expected=True)
|
||||
raise
|
||||
|
||||
def _extract_cloudflare_session_ua(self, url):
|
||||
ci_session_cookie = self._get_cookies(url).get('ci_session')
|
||||
if ci_session_cookie:
|
||||
ci_session = compat_urllib_parse_unquote_plus(ci_session_cookie.value)
|
||||
# ci_session is a string serialized by PHP function serialize()
|
||||
# This case is simple enough to use regular expressions only
|
||||
return self._search_regex(
|
||||
r'"user_agent";s:\d+:"([^"]+)"', ci_session, 'user agent',
|
||||
default=None)
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
@@ -57,8 +91,11 @@ class FunimationIE(InfoExtractor):
|
||||
'email_field': username,
|
||||
'password_field': password,
|
||||
})
|
||||
login_request = sanitized_Request('http://www.funimation.com/login', data, headers={
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:42.0) Gecko/20100101 Firefox/42.0',
|
||||
user_agent = self._extract_cloudflare_session_ua(self._LOGIN_URL)
|
||||
if not user_agent:
|
||||
user_agent = 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:42.0) Gecko/20100101 Firefox/42.0'
|
||||
login_request = sanitized_Request(self._LOGIN_URL, data, headers={
|
||||
'User-Agent': user_agent,
|
||||
'Content-Type': 'application/x-www-form-urlencoded'
|
||||
})
|
||||
login_page = self._download_webpage(
|
||||
@@ -103,11 +140,16 @@ class FunimationIE(InfoExtractor):
|
||||
('mobile', 'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.114 Mobile Safari/537.36'),
|
||||
)
|
||||
|
||||
user_agent = self._extract_cloudflare_session_ua(url)
|
||||
if user_agent:
|
||||
USER_AGENTS = ((None, user_agent),)
|
||||
|
||||
for kind, user_agent in USER_AGENTS:
|
||||
request = sanitized_Request(url)
|
||||
request.add_header('User-Agent', user_agent)
|
||||
webpage = self._download_webpage(
|
||||
request, display_id, 'Downloading %s webpage' % kind)
|
||||
request, display_id,
|
||||
'Downloading %s webpage' % kind if kind else 'Downloading webpage')
|
||||
|
||||
playlist = self._parse_json(
|
||||
self._search_regex(
|
||||
|
||||
@@ -24,15 +24,12 @@ class GazetaIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.gazeta.ru/video/main/main/2015/06/22/platit_ili_ne_platit_po_isku_yukosa.shtml',
|
||||
'md5': '37f19f78355eb2f4256ee1688359f24c',
|
||||
'info_dict': {
|
||||
'id': '252048',
|
||||
'ext': 'mp4',
|
||||
'title': '"Если по иску ЮКОСа придется платить, это будет большой удар по бюджету"',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['EaglePlatform'],
|
||||
}]
|
||||
|
||||
|
||||
@@ -4,7 +4,6 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
remove_end,
|
||||
HEADRequest,
|
||||
sanitized_Request,
|
||||
urlencode_postdata,
|
||||
@@ -51,63 +50,33 @@ class GDCVaultIE(InfoExtractor):
|
||||
{
|
||||
'url': 'http://gdcvault.com/play/1020791/',
|
||||
'only_matching': True,
|
||||
}
|
||||
},
|
||||
{
|
||||
# Hard-coded hostname
|
||||
'url': 'http://gdcvault.com/play/1023460/Tenacious-Design-and-The-Interface',
|
||||
'md5': 'a8efb6c31ed06ca8739294960b2dbabd',
|
||||
'info_dict': {
|
||||
'id': '1023460',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'Tenacious-Design-and-The-Interface',
|
||||
'title': 'Tenacious Design and The Interface of \'Destiny\'',
|
||||
},
|
||||
},
|
||||
{
|
||||
# Multiple audios
|
||||
'url': 'http://www.gdcvault.com/play/1014631/Classic-Game-Postmortem-PAC',
|
||||
'info_dict': {
|
||||
'id': '1014631',
|
||||
'ext': 'flv',
|
||||
'title': 'How to Create a Good Game - From My Experience of Designing Pac-Man',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # Requires rtmpdump
|
||||
'format': 'jp', # The japanese audio
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def _parse_mp4(self, xml_description):
|
||||
video_formats = []
|
||||
mp4_video = xml_description.find('./metadata/mp4video')
|
||||
if mp4_video is None:
|
||||
return None
|
||||
|
||||
mobj = re.match(r'(?P<root>https?://.*?/).*', mp4_video.text)
|
||||
video_root = mobj.group('root')
|
||||
formats = xml_description.findall('./metadata/MBRVideos/MBRVideo')
|
||||
for format in formats:
|
||||
mobj = re.match(r'mp4\:(?P<path>.*)', format.find('streamName').text)
|
||||
url = video_root + mobj.group('path')
|
||||
vbr = format.find('bitrate').text
|
||||
video_formats.append({
|
||||
'url': url,
|
||||
'vbr': int(vbr),
|
||||
})
|
||||
return video_formats
|
||||
|
||||
def _parse_flv(self, xml_description):
|
||||
formats = []
|
||||
akamai_url = xml_description.find('./metadata/akamaiHost').text
|
||||
audios = xml_description.find('./metadata/audios')
|
||||
if audios is not None:
|
||||
for audio in audios:
|
||||
formats.append({
|
||||
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
|
||||
'play_path': remove_end(audio.get('url'), '.flv'),
|
||||
'ext': 'flv',
|
||||
'vcodec': 'none',
|
||||
'format_id': audio.get('code'),
|
||||
})
|
||||
slide_video_path = xml_description.find('./metadata/slideVideo').text
|
||||
formats.append({
|
||||
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
|
||||
'play_path': remove_end(slide_video_path, '.flv'),
|
||||
'ext': 'flv',
|
||||
'format_note': 'slide deck video',
|
||||
'quality': -2,
|
||||
'preference': -2,
|
||||
'format_id': 'slides',
|
||||
})
|
||||
speaker_video_path = xml_description.find('./metadata/speakerVideo').text
|
||||
formats.append({
|
||||
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
|
||||
'play_path': remove_end(speaker_video_path, '.flv'),
|
||||
'ext': 'flv',
|
||||
'format_note': 'speaker video',
|
||||
'quality': -1,
|
||||
'preference': -1,
|
||||
'format_id': 'speaker',
|
||||
})
|
||||
return formats
|
||||
|
||||
def _login(self, webpage_url, display_id):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None or password is None:
|
||||
@@ -183,17 +152,10 @@ class GDCVaultIE(InfoExtractor):
|
||||
r'<iframe src=".*?\?xmlURL=xml/(?P<xml_file>.+?\.xml).*?".*?</iframe>',
|
||||
start_page, 'xml filename')
|
||||
|
||||
xml_description = self._download_xml(
|
||||
'%s/xml/%s' % (xml_root, xml_name), display_id)
|
||||
|
||||
video_title = xml_description.find('./metadata/title').text
|
||||
video_formats = self._parse_mp4(xml_description)
|
||||
if video_formats is None:
|
||||
video_formats = self._parse_flv(xml_description)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': video_title,
|
||||
'formats': video_formats,
|
||||
'url': '%s/xml/%s' % (xml_root, xml_name),
|
||||
'ie_key': 'DigitallySpeaking',
|
||||
}
|
||||
|
||||
@@ -51,7 +51,7 @@ from .tnaflix import TNAFlixNetworkEmbedIE
|
||||
from .vimeo import VimeoIE
|
||||
from .dailymotion import DailymotionCloudIE
|
||||
from .onionstudios import OnionStudiosIE
|
||||
from .snagfilms import SnagFilmsEmbedIE
|
||||
from .viewlift import ViewLiftEmbedIE
|
||||
from .screenwavemedia import ScreenwaveMediaIE
|
||||
from .mtv import MTVServicesEmbeddedIE
|
||||
from .pladform import PladformIE
|
||||
@@ -237,6 +237,7 @@ class GenericIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'car-20120827-manifest',
|
||||
'formats': 'mincount:9',
|
||||
'upload_date': '20130904',
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
@@ -596,7 +597,11 @@ class GenericIE(InfoExtractor):
|
||||
'id': 'k2mm4bCdJ6CQ2i7c8o2',
|
||||
'ext': 'mp4',
|
||||
'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
|
||||
'description': 'md5:faf028e48a461b8b7fad38f1e104b119',
|
||||
'uploader': 'Spi0n',
|
||||
'uploader_id': 'xgditw',
|
||||
'upload_date': '20140425',
|
||||
'timestamp': 1398441542,
|
||||
},
|
||||
'add_ie': ['Dailymotion'],
|
||||
},
|
||||
@@ -729,8 +734,11 @@ class GenericIE(InfoExtractor):
|
||||
'id': 'uxjb0lwrcz',
|
||||
'ext': 'mp4',
|
||||
'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
|
||||
'description': 'a Martin Fowler video from ThoughtWorks',
|
||||
'duration': 1715.0,
|
||||
'uploader': 'thoughtworks.wistia.com',
|
||||
'upload_date': '20140603',
|
||||
'timestamp': 1401832161,
|
||||
},
|
||||
},
|
||||
# Soundcloud embed
|
||||
@@ -879,6 +887,7 @@ class GenericIE(InfoExtractor):
|
||||
# Eagle.Platform embed (generic URL)
|
||||
{
|
||||
'url': 'http://lenta.ru/news/2015/03/06/navalny/',
|
||||
# Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
|
||||
'info_dict': {
|
||||
'id': '227304',
|
||||
'ext': 'mp4',
|
||||
@@ -893,6 +902,7 @@ class GenericIE(InfoExtractor):
|
||||
# ClipYou (Eagle.Platform) embed (custom URL)
|
||||
{
|
||||
'url': 'http://muz-tv.ru/play/7129/',
|
||||
# Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
|
||||
'info_dict': {
|
||||
'id': '12820',
|
||||
'ext': 'mp4',
|
||||
@@ -981,6 +991,9 @@ class GenericIE(InfoExtractor):
|
||||
'ext': 'flv',
|
||||
'title': "PFT Live: New leader in the 'new-look' defense",
|
||||
'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
|
||||
'uploader': 'NBCU-SPORTS',
|
||||
'upload_date': '20140107',
|
||||
'timestamp': 1389118457,
|
||||
},
|
||||
},
|
||||
# UDN embed
|
||||
@@ -1033,6 +1046,9 @@ class GenericIE(InfoExtractor):
|
||||
'title': 'SN Presents: Russell Martin, World Citizen',
|
||||
'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
|
||||
'uploader': 'Rogers Sportsnet',
|
||||
'uploader_id': '1704050871',
|
||||
'upload_date': '20150525',
|
||||
'timestamp': 1432570283,
|
||||
},
|
||||
},
|
||||
# Dailymotion Cloud video
|
||||
@@ -1124,6 +1140,9 @@ class GenericIE(InfoExtractor):
|
||||
'title': 'The Cardinal Pell Interview',
|
||||
'description': 'Sky News Contributor Andrew Bolt interviews George Pell in Rome, following the Cardinal\'s evidence before the Royal Commission into Child Abuse. ',
|
||||
'uploader': 'GlobeCast Australia - GlobeStream',
|
||||
'uploader_id': '2733773828001',
|
||||
'upload_date': '20160304',
|
||||
'timestamp': 1457083087,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 downloads
|
||||
@@ -1905,10 +1924,10 @@ class GenericIE(InfoExtractor):
|
||||
if onionstudios_url:
|
||||
return self.url_result(onionstudios_url)
|
||||
|
||||
# Look for SnagFilms embeds
|
||||
snagfilms_url = SnagFilmsEmbedIE._extract_url(webpage)
|
||||
if snagfilms_url:
|
||||
return self.url_result(snagfilms_url)
|
||||
# Look for ViewLift embeds
|
||||
viewlift_url = ViewLiftEmbedIE._extract_url(webpage)
|
||||
if viewlift_url:
|
||||
return self.url_result(viewlift_url)
|
||||
|
||||
# Look for JWPlatform embeds
|
||||
jwplatform_url = JWPlatformIE._extract_url(webpage)
|
||||
@@ -2045,6 +2064,7 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
entries = []
|
||||
for video_url in found:
|
||||
video_url = unescapeHTML(video_url)
|
||||
video_url = video_url.replace('\\/', '/')
|
||||
video_url = compat_urlparse.urljoin(url, video_url)
|
||||
video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
|
||||
|
||||
@@ -2,12 +2,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
class GPUTechConfIE(InfoExtractor):
|
||||
@@ -27,29 +21,15 @@ class GPUTechConfIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
root_path = self._search_regex(r'var\s+rootPath\s*=\s*"([^"]+)', webpage, 'root path', 'http://evt.dispeak.com/nvidia/events/gtc15/')
|
||||
xml_file_id = self._search_regex(r'var\s+xmlFileId\s*=\s*"([^"]+)', webpage, 'xml file id')
|
||||
|
||||
doc = self._download_xml('%sxml/%s.xml' % (root_path, xml_file_id), video_id)
|
||||
|
||||
metadata = xpath_element(doc, 'metadata')
|
||||
http_host = xpath_text(metadata, 'httpHost', 'http host', True)
|
||||
mbr_videos = xpath_element(metadata, 'MBRVideos')
|
||||
|
||||
formats = []
|
||||
for mbr_video in mbr_videos.findall('MBRVideo'):
|
||||
stream_name = xpath_text(mbr_video, 'streamName')
|
||||
if stream_name:
|
||||
formats.append({
|
||||
'url': 'http://%s/%s' % (http_host, stream_name.replace('mp4:', '')),
|
||||
'tbr': int_or_none(xpath_text(mbr_video, 'bitrate')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
root_path = self._search_regex(
|
||||
r'var\s+rootPath\s*=\s*"([^"]+)', webpage, 'root path',
|
||||
default='http://evt.dispeak.com/nvidia/events/gtc15/')
|
||||
xml_file_id = self._search_regex(
|
||||
r'var\s+xmlFileId\s*=\s*"([^"]+)', webpage, 'xml file id')
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'title': xpath_text(metadata, 'title'),
|
||||
'duration': parse_duration(xpath_text(metadata, 'endTime')),
|
||||
'creator': xpath_text(metadata, 'speaker'),
|
||||
'formats': formats,
|
||||
'url': '%sxml/%s.xml' % (root_path, xml_file_id),
|
||||
'ie_key': 'DigitallySpeaking',
|
||||
}
|
||||
|
||||
@@ -287,6 +287,13 @@ class IqiyiIE(InfoExtractor):
|
||||
('10', 'h1'),
|
||||
]
|
||||
|
||||
AUTH_API_ERRORS = {
|
||||
# No preview available (不允许试看鉴权失败)
|
||||
'Q00505': 'This video requires a VIP account',
|
||||
# End of preview time (试看结束鉴权失败)
|
||||
'Q00506': 'Needs a VIP account for full video',
|
||||
}
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
@@ -372,14 +379,18 @@ class IqiyiIE(InfoExtractor):
|
||||
note='Downloading video authentication JSON',
|
||||
errnote='Unable to download video authentication JSON')
|
||||
|
||||
if auth_result['code'] == 'Q00505': # No preview available (不允许试看鉴权失败)
|
||||
raise ExtractorError('This video requires a VIP account', expected=True)
|
||||
if auth_result['code'] == 'Q00506': # End of preview time (试看结束鉴权失败)
|
||||
code = auth_result.get('code')
|
||||
msg = self.AUTH_API_ERRORS.get(code) or auth_result.get('msg') or code
|
||||
if code == 'Q00506':
|
||||
if do_report_warning:
|
||||
self.report_warning('Needs a VIP account for full video')
|
||||
self.report_warning(msg)
|
||||
return False
|
||||
if 'data' not in auth_result:
|
||||
if msg is not None:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, msg), expected=True)
|
||||
raise ExtractorError('Unexpected error from Iqiyi auth API')
|
||||
|
||||
return auth_result
|
||||
return auth_result['data']
|
||||
|
||||
def construct_video_urls(self, data, video_id, _uuid, tvid):
|
||||
def do_xor(x, y):
|
||||
@@ -455,11 +466,11 @@ class IqiyiIE(InfoExtractor):
|
||||
need_vip_warning_report = False
|
||||
break
|
||||
param.update({
|
||||
't': auth_result['data']['t'],
|
||||
't': auth_result['t'],
|
||||
# cid is hard-coded in com/qiyi/player/core/player/RuntimeData.as
|
||||
'cid': 'afbe8fd3d73448c9',
|
||||
'vid': video_id,
|
||||
'QY00001': auth_result['data']['u'],
|
||||
'QY00001': auth_result['u'],
|
||||
})
|
||||
api_video_url += '?' if '?' not in api_video_url else '&'
|
||||
api_video_url += compat_urllib_parse_urlencode(param)
|
||||
|
||||
@@ -81,7 +81,7 @@ class KuwoIE(KuwoBaseIE):
|
||||
'id': '6446136',
|
||||
'ext': 'mp3',
|
||||
'title': '心',
|
||||
'description': 'md5:b2ab6295d014005bfc607525bfc1e38a',
|
||||
'description': 'md5:5d0e947b242c35dc0eb1d2fce9fbf02c',
|
||||
'creator': 'IU',
|
||||
'upload_date': '20150518',
|
||||
},
|
||||
@@ -102,10 +102,10 @@ class KuwoIE(KuwoBaseIE):
|
||||
raise ExtractorError('this song has been offline because of copyright issues', expected=True)
|
||||
|
||||
song_name = self._html_search_regex(
|
||||
r'(?s)class="(?:[^"\s]+\s+)*title(?:\s+[^"\s]+)*".*?<h1[^>]+title="([^"]+)"', webpage, 'song name')
|
||||
singer_name = self._html_search_regex(
|
||||
r'<div[^>]+class="s_img">\s*<a[^>]+title="([^>]+)"',
|
||||
webpage, 'singer name', fatal=False)
|
||||
r'<p[^>]+id="lrcName">([^<]+)</p>', webpage, 'song name')
|
||||
singer_name = remove_start(self._html_search_regex(
|
||||
r'<a[^>]+href="http://www\.kuwo\.cn/artist/content\?name=([^"]+)">',
|
||||
webpage, 'singer name', fatal=False), '歌手')
|
||||
lrc_content = clean_html(get_element_by_id('lrcContent', webpage))
|
||||
if lrc_content == '暂无': # indicates no lyrics
|
||||
lrc_content = None
|
||||
@@ -114,7 +114,7 @@ class KuwoIE(KuwoBaseIE):
|
||||
self._sort_formats(formats)
|
||||
|
||||
album_id = self._html_search_regex(
|
||||
r'<p[^>]+class="album"[^<]+<a[^>]+href="http://www\.kuwo\.cn/album/(\d+)/"',
|
||||
r'<a[^>]+href="http://www\.kuwo\.cn/album/(\d+)/"',
|
||||
webpage, 'album id', fatal=False)
|
||||
|
||||
publish_time = None
|
||||
@@ -268,7 +268,7 @@ class KuwoCategoryIE(InfoExtractor):
|
||||
'title': '八十年代精选',
|
||||
'description': '这些都是属于八十年代的回忆!',
|
||||
},
|
||||
'playlist_count': 24,
|
||||
'playlist_mincount': 24,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -1,46 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
|
||||
|
||||
class MalemotionIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://malemotion\.com/video/(.+?)\.(?P<id>.+?)(#|$)'
|
||||
_TEST = {
|
||||
'url': 'http://malemotion.com/video/bete-de-concours.ltc',
|
||||
'md5': '3013e53a0afbde2878bc39998c33e8a5',
|
||||
'info_dict': {
|
||||
'id': 'ltc',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bête de Concours',
|
||||
'age_limit': 18,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = compat_urllib_parse_unquote(self._search_regex(
|
||||
r'<source type="video/mp4" src="(.+?)"', webpage, 'video URL'))
|
||||
video_title = self._html_search_regex(
|
||||
r'<title>(.*?)</title', webpage, 'title')
|
||||
video_thumbnail = self._search_regex(
|
||||
r'<video .+?poster="(.+?)"', webpage, 'thumbnail', fatal=False)
|
||||
|
||||
formats = [{
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'format_id': 'mp4',
|
||||
'preference': 1,
|
||||
}]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': video_title,
|
||||
'thumbnail': video_thumbnail,
|
||||
'age_limit': 18,
|
||||
}
|
||||
@@ -81,6 +81,9 @@ class MetacafeIE(InfoExtractor):
|
||||
'title': 'Open: This is Face the Nation, February 9',
|
||||
'description': 'md5:8a9ceec26d1f7ed6eab610834cc1a476',
|
||||
'duration': 96,
|
||||
'uploader': 'CBSI-NEW',
|
||||
'upload_date': '20140209',
|
||||
'timestamp': 1391959800,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
|
||||
63
youtube_dl/extractor/mgtv.py
Normal file
63
youtube_dl/extractor/mgtv.py
Normal file
@@ -0,0 +1,63 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class MGTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.mgtv\.com/v/(?:[^/]+/)*(?P<id>\d+)\.html'
|
||||
IE_DESC = '芒果TV'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html',
|
||||
'md5': '',
|
||||
'info_dict': {
|
||||
'id': '3116640',
|
||||
'ext': 'mp4',
|
||||
'title': '我是歌手第四季双年巅峰会:韩红李玟“双王”领军对抗',
|
||||
'description': '我是歌手第四季双年巅峰会',
|
||||
'duration': 7461,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # m3u8 download
|
||||
},
|
||||
}
|
||||
|
||||
_FORMAT_MAP = {
|
||||
'标清': ('Standard', 0),
|
||||
'高清': ('High', 1),
|
||||
'超清': ('SuperHigh', 2),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
api_data = self._download_json(
|
||||
'http://v.api.mgtv.com/player/video', video_id,
|
||||
query={'video_id': video_id})['data']
|
||||
info = api_data['info']
|
||||
|
||||
formats = []
|
||||
for idx, stream in enumerate(api_data['stream']):
|
||||
format_name = stream.get('name')
|
||||
format_id, preference = self._FORMAT_MAP.get(format_name, (None, None))
|
||||
format_info = self._download_json(
|
||||
stream['url'], video_id,
|
||||
note='Download video info for format %s' % format_id or '#%d' % idx)
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': format_info['info'],
|
||||
'ext': 'mp4', # These are m3u8 playlists
|
||||
'preference': preference,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info['title'].strip(),
|
||||
'formats': formats,
|
||||
'description': info.get('desc'),
|
||||
'duration': int_or_none(info.get('duration')),
|
||||
'thumbnail': info.get('thumb'),
|
||||
}
|
||||
@@ -15,9 +15,9 @@ class MiTeleIE(InfoExtractor):
|
||||
IE_DESC = 'mitele.es'
|
||||
_VALID_URL = r'https?://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<id>[^/]+)/'
|
||||
|
||||
_TESTS = [{
|
||||
_TEST = {
|
||||
'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
|
||||
'md5': '0ff1a13aebb35d9bc14081ff633dd324',
|
||||
# MD5 is unstable
|
||||
'info_dict': {
|
||||
'id': '0NF1jJnxS1Wu3pHrmvFyw2',
|
||||
'display_id': 'programa-144',
|
||||
@@ -27,7 +27,7 @@ class MiTeleIE(InfoExtractor):
|
||||
'thumbnail': 're:(?i)^https?://.*\.jpg$',
|
||||
'duration': 2913,
|
||||
},
|
||||
}]
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
@@ -1,63 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_urlencode
|
||||
|
||||
|
||||
class MuzuTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.muzu\.tv/(.+?)/(.+?)/(?P<id>\d+)'
|
||||
IE_NAME = 'muzu.tv'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.muzu.tv/defected/marcashken-featuring-sos-cat-walk-original-mix-music-video/1981454/',
|
||||
'md5': '98f8b2c7bc50578d6a0364fff2bfb000',
|
||||
'info_dict': {
|
||||
'id': '1981454',
|
||||
'ext': 'mp4',
|
||||
'title': 'Cat Walk (Original Mix)',
|
||||
'description': 'md5:90e868994de201b2570e4e5854e19420',
|
||||
'uploader': 'MarcAshken featuring SOS',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
info_data = compat_urllib_parse_urlencode({
|
||||
'format': 'json',
|
||||
'url': url,
|
||||
})
|
||||
info = self._download_json(
|
||||
'http://www.muzu.tv/api/oembed/?%s' % info_data,
|
||||
video_id, 'Downloading video info')
|
||||
|
||||
player_info = self._download_json(
|
||||
'http://player.muzu.tv/player/playerInit?ai=%s' % video_id,
|
||||
video_id, 'Downloading player info')
|
||||
video_info = player_info['videos'][0]
|
||||
for quality in ['1080', '720', '480', '360']:
|
||||
if video_info.get('v%s' % quality):
|
||||
break
|
||||
|
||||
data = compat_urllib_parse_urlencode({
|
||||
'ai': video_id,
|
||||
# Even if each time you watch a video the hash changes,
|
||||
# it seems to work for different videos, and it will work
|
||||
# even if you use any non empty string as a hash
|
||||
'viewhash': 'VBNff6djeV4HV5TRPW5kOHub2k',
|
||||
'device': 'web',
|
||||
'qv': quality,
|
||||
})
|
||||
video_url_info = self._download_json(
|
||||
'http://player.muzu.tv/player/requestVideo?%s' % data,
|
||||
video_id, 'Downloading video url')
|
||||
video_url = video_url_info['url']
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info['title'],
|
||||
'url': video_url,
|
||||
'thumbnail': info['thumbnail_url'],
|
||||
'description': info['description'],
|
||||
'uploader': info['author_name'],
|
||||
}
|
||||
@@ -10,9 +10,10 @@ from ..utils import (
|
||||
|
||||
class MwaveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://mwave\.interest\.me/mnettv/videodetail\.m\?searchVideoDetailVO\.clip_id=(?P<id>[0-9]+)'
|
||||
_URL_TEMPLATE = 'http://mwave.interest.me/mnettv/videodetail.m?searchVideoDetailVO.clip_id=%s'
|
||||
_TEST = {
|
||||
'url': 'http://mwave.interest.me/mnettv/videodetail.m?searchVideoDetailVO.clip_id=168859',
|
||||
'md5': 'c930e27b7720aaa3c9d0018dfc8ff6cc',
|
||||
# md5 is unstable
|
||||
'info_dict': {
|
||||
'id': '168859',
|
||||
'ext': 'flv',
|
||||
@@ -56,3 +57,28 @@ class MwaveIE(InfoExtractor):
|
||||
'view_count': int_or_none(vod_info.get('hit')),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class MwaveMeetGreetIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://mwave\.interest\.me/meetgreet/view/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://mwave.interest.me/meetgreet/view/256',
|
||||
'info_dict': {
|
||||
'id': '173294',
|
||||
'ext': 'flv',
|
||||
'title': '[MEET&GREET] Park BoRam',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'Mwave',
|
||||
'duration': 3634,
|
||||
'view_count': int,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
clip_id = self._html_search_regex(
|
||||
r'<iframe[^>]+src="/mnettv/ifr_clip\.m\?searchVideoDetailVO\.clip_id=(\d+)',
|
||||
webpage, 'clip ID')
|
||||
clip_url = MwaveIE._URL_TEMPLATE % clip_id
|
||||
return self.url_result(clip_url, 'Mwave', clip_id)
|
||||
|
||||
@@ -134,6 +134,9 @@ class NBCSportsIE(InfoExtractor):
|
||||
'ext': 'flv',
|
||||
'title': 'Tom Izzo, Michigan St. has \'so much respect\' for Duke',
|
||||
'description': 'md5:ecb459c9d59e0766ac9c7d5d0eda8113',
|
||||
'uploader': 'NBCU-SPORTS',
|
||||
'upload_date': '20150330',
|
||||
'timestamp': 1427726529,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -172,7 +175,7 @@ class CSNNEIE(InfoExtractor):
|
||||
|
||||
|
||||
class NBCNewsIE(ThePlatformIE):
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?(?:nbcnews|today)\.com/
|
||||
(?:video/.+?/(?P<id>\d+)|
|
||||
([^/]+/)*(?P<display_id>[^/?]+))
|
||||
'''
|
||||
@@ -230,6 +233,18 @@ class NBCNewsIE(ThePlatformIE):
|
||||
},
|
||||
'expected_warnings': ['http-6000 is not available']
|
||||
},
|
||||
{
|
||||
'url': 'http://www.today.com/video/see-the-aurora-borealis-from-space-in-stunning-new-nasa-video-669831235788',
|
||||
'md5': '118d7ca3f0bea6534f119c68ef539f71',
|
||||
'info_dict': {
|
||||
'id': '669831235788',
|
||||
'ext': 'mp4',
|
||||
'title': 'See the aurora borealis from space in stunning new NASA video',
|
||||
'description': 'md5:74752b7358afb99939c5f8bb2d1d04b1',
|
||||
'upload_date': '20160420',
|
||||
'timestamp': 1461152093,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.nbcnews.com/watch/dateline/full-episode--deadly-betrayal-386250819952',
|
||||
'only_matching': True,
|
||||
@@ -264,7 +279,10 @@ class NBCNewsIE(ThePlatformIE):
|
||||
info = bootstrap['results'][0]['video']
|
||||
else:
|
||||
player_instance_json = self._search_regex(
|
||||
r'videoObj\s*:\s*({.+})', webpage, 'player instance')
|
||||
r'videoObj\s*:\s*({.+})', webpage, 'player instance', default=None)
|
||||
if not player_instance_json:
|
||||
player_instance_json = self._html_search_regex(
|
||||
r'data-video="([^"]+)"', webpage, 'video json')
|
||||
info = self._parse_json(player_instance_json, display_id)
|
||||
video_id = info['mpxId']
|
||||
title = info['title']
|
||||
@@ -295,7 +313,7 @@ class NBCNewsIE(ThePlatformIE):
|
||||
formats.extend(tp_formats)
|
||||
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
||||
else:
|
||||
tbr = int_or_none(video_asset.get('bitRate'), 1000)
|
||||
tbr = int_or_none(video_asset.get('bitRate') or video_asset.get('bitrate'), 1000)
|
||||
format_id = 'http%s' % ('-%d' % tbr if tbr else '')
|
||||
video_url = update_url_query(
|
||||
video_url, {'format': 'redirect'})
|
||||
@@ -321,10 +339,9 @@ class NBCNewsIE(ThePlatformIE):
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': info.get('description'),
|
||||
'thumbnail': info.get('description'),
|
||||
'thumbnail': info.get('thumbnail'),
|
||||
'duration': int_or_none(info.get('duration')),
|
||||
'timestamp': parse_iso8601(info.get('pubDate')),
|
||||
'timestamp': parse_iso8601(info.get('pubDate') or info.get('pub_date')),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
@@ -4,24 +4,24 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class NewstubeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?newstube\.ru/media/(?P<id>.+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.newstube.ru/media/telekanal-cnn-peremestil-gorod-slavyansk-v-krym',
|
||||
'md5': '801eef0c2a9f4089fa04e4fe3533abdc',
|
||||
'info_dict': {
|
||||
'id': '728e0ef2-e187-4012-bac0-5a081fdcb1f6',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Телеканал CNN переместил город Славянск в Крым',
|
||||
'description': 'md5:419a8c9f03442bc0b0a794d689360335',
|
||||
'duration': 31.05,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -62,7 +62,6 @@ class NewstubeIE(InfoExtractor):
|
||||
server = media_location.find(ns('./Server')).text
|
||||
app = media_location.find(ns('./App')).text
|
||||
media_id = stream_info.find(ns('./Id')).text
|
||||
quality_id = stream_info.find(ns('./QualityId')).text
|
||||
name = stream_info.find(ns('./Name')).text
|
||||
width = int(stream_info.find(ns('./Width')).text)
|
||||
height = int(stream_info.find(ns('./Height')).text)
|
||||
@@ -74,12 +73,38 @@ class NewstubeIE(InfoExtractor):
|
||||
'rtmp_conn': ['S:%s' % session_id, 'S:%s' % media_id, 'S:n2'],
|
||||
'page_url': url,
|
||||
'ext': 'flv',
|
||||
'format_id': quality_id,
|
||||
'format_note': name,
|
||||
'format_id': 'rtmp' + ('-%s' % name if name else ''),
|
||||
'width': width,
|
||||
'height': height,
|
||||
})
|
||||
|
||||
sources_data = self._download_json(
|
||||
'http://www.newstube.ru/player2/getsources?guid=%s' % video_guid,
|
||||
video_guid, fatal=False)
|
||||
if sources_data:
|
||||
for source in sources_data.get('Sources', []):
|
||||
source_url = source.get('Src')
|
||||
if not source_url:
|
||||
continue
|
||||
height = int_or_none(source.get('Height'))
|
||||
f = {
|
||||
'format_id': 'http' + ('-%dp' % height if height else ''),
|
||||
'url': source_url,
|
||||
'width': int_or_none(source.get('Width')),
|
||||
'height': height,
|
||||
}
|
||||
source_type = source.get('Type')
|
||||
if source_type:
|
||||
mobj = re.search(r'codecs="([^,]+),\s*([^"]+)"', source_type)
|
||||
if mobj:
|
||||
vcodec, acodec = mobj.groups()
|
||||
f.update({
|
||||
'vcodec': vcodec,
|
||||
'acodec': acodec,
|
||||
})
|
||||
formats.append(f)
|
||||
|
||||
self._check_formats(formats, video_guid)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
|
||||
@@ -8,10 +8,15 @@ from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_urllib_parse_urlparse
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
unified_strdate,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
@@ -70,8 +75,8 @@ class NHLBaseInfoExtractor(InfoExtractor):
|
||||
return ret
|
||||
|
||||
|
||||
class NHLIE(NHLBaseInfoExtractor):
|
||||
IE_NAME = 'nhl.com'
|
||||
class NHLVideocenterIE(NHLBaseInfoExtractor):
|
||||
IE_NAME = 'nhl.com:videocenter'
|
||||
_VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/(?:console|embed)?(?:\?(?:.*?[?&])?)(?:id|hlg|playlist)=(?P<id>[-0-9a-zA-Z,]+)'
|
||||
|
||||
_TESTS = [{
|
||||
@@ -186,8 +191,8 @@ class NHLNewsIE(NHLBaseInfoExtractor):
|
||||
return self._real_extract_video(video_id)
|
||||
|
||||
|
||||
class NHLVideocenterIE(NHLBaseInfoExtractor):
|
||||
IE_NAME = 'nhl.com:videocenter'
|
||||
class NHLVideocenterCategoryIE(NHLBaseInfoExtractor):
|
||||
IE_NAME = 'nhl.com:videocenter:category'
|
||||
IE_DESC = 'NHL videocenter category'
|
||||
_VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?[^(id=)]*catid=(?P<catid>[0-9]+)(?![&?]id=).*?)?$'
|
||||
_TEST = {
|
||||
@@ -236,3 +241,86 @@ class NHLVideocenterIE(NHLBaseInfoExtractor):
|
||||
'id': cat_id,
|
||||
'entries': [self._extract_video(v) for v in videos],
|
||||
}
|
||||
|
||||
|
||||
class NHLIE(InfoExtractor):
|
||||
IE_NAME = 'nhl.com'
|
||||
_VALID_URL = r'https?://(?:www\.)?nhl\.com/([^/]+/)*c-(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
# type=video
|
||||
'url': 'https://www.nhl.com/video/anisimov-cleans-up-mess/t-277752844/c-43663503',
|
||||
'md5': '0f7b9a8f986fb4b4eeeece9a56416eaf',
|
||||
'info_dict': {
|
||||
'id': '43663503',
|
||||
'ext': 'mp4',
|
||||
'title': 'Anisimov cleans up mess',
|
||||
'description': 'md5:a02354acdfe900e940ce40706939ca63',
|
||||
'timestamp': 1461288600,
|
||||
'upload_date': '20160422',
|
||||
},
|
||||
}, {
|
||||
# type=article
|
||||
'url': 'https://www.nhl.com/news/dennis-wideman-suspended/c-278258934',
|
||||
'md5': '1f39f4ea74c1394dea110699a25b366c',
|
||||
'info_dict': {
|
||||
'id': '40784403',
|
||||
'ext': 'mp4',
|
||||
'title': 'Wideman suspended by NHL',
|
||||
'description': 'Flames defenseman Dennis Wideman was banned 20 games for violation of Rule 40 (Physical Abuse of Officials)',
|
||||
'upload_date': '20160204',
|
||||
'timestamp': 1454544904,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
tmp_id = self._match_id(url)
|
||||
video_data = self._download_json(
|
||||
'https://nhl.bamcontent.com/nhl/id/v1/%s/details/web-v1.json' % tmp_id,
|
||||
tmp_id)
|
||||
if video_data.get('type') == 'article':
|
||||
video_data = video_data['media']
|
||||
|
||||
video_id = compat_str(video_data['id'])
|
||||
title = video_data['title']
|
||||
|
||||
formats = []
|
||||
for playback in video_data.get('playbacks', []):
|
||||
playback_url = playback.get('url')
|
||||
if not playback_url:
|
||||
continue
|
||||
ext = determine_ext(playback_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
playback_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id=playback.get('name', 'hls'), fatal=False))
|
||||
else:
|
||||
height = int_or_none(playback.get('height'))
|
||||
formats.append({
|
||||
'format_id': playback.get('name', 'http' + ('-%dp' % height if height else '')),
|
||||
'url': playback_url,
|
||||
'width': int_or_none(playback.get('width')),
|
||||
'height': height,
|
||||
})
|
||||
self._sort_formats(formats, ('preference', 'width', 'height', 'tbr', 'format_id'))
|
||||
|
||||
thumbnails = []
|
||||
for thumbnail_id, thumbnail_data in video_data.get('image', {}).get('cuts', {}).items():
|
||||
thumbnail_url = thumbnail_data.get('src')
|
||||
if not thumbnail_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'id': thumbnail_id,
|
||||
'url': thumbnail_url,
|
||||
'width': int_or_none(thumbnail_data.get('width')),
|
||||
'height': int_or_none(thumbnail_data.get('height')),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': video_data.get('description'),
|
||||
'timestamp': parse_iso8601(video_data.get('date')),
|
||||
'duration': parse_duration(video_data.get('duration')),
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .screenwavemedia import ScreenwaveMediaIE
|
||||
|
||||
from ..utils import (
|
||||
unified_strdate,
|
||||
@@ -12,7 +13,6 @@ class NormalbootsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?normalboots\.com/video/(?P<id>[0-9a-z-]*)/?$'
|
||||
_TEST = {
|
||||
'url': 'http://normalboots.com/video/home-alone-games-jontron/',
|
||||
'md5': '8bf6de238915dd501105b44ef5f1e0f6',
|
||||
'info_dict': {
|
||||
'id': 'home-alone-games-jontron',
|
||||
'ext': 'mp4',
|
||||
@@ -22,9 +22,10 @@ class NormalbootsIE(InfoExtractor):
|
||||
'upload_date': '20140125',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['ScreenwaveMedia'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -38,16 +39,15 @@ class NormalbootsIE(InfoExtractor):
|
||||
r'<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>',
|
||||
webpage, 'date', fatal=False))
|
||||
|
||||
player_url = self._html_search_regex(
|
||||
r'<iframe\swidth="[0-9]+"\sheight="[0-9]+"\ssrc="(?P<url>[\S]+)"',
|
||||
webpage, 'player url')
|
||||
player_page = self._download_webpage(player_url, video_id)
|
||||
video_url = self._html_search_regex(
|
||||
r"file:\s'(?P<file>[^']+\.mp4)'", player_page, 'file')
|
||||
screenwavemedia_url = self._html_search_regex(
|
||||
ScreenwaveMediaIE.EMBED_PATTERN, webpage, 'screenwave URL',
|
||||
group='url')
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'url': screenwavemedia_url,
|
||||
'ie_key': ScreenwaveMediaIE.ie_key(),
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
|
||||
@@ -23,7 +23,7 @@ class NRKIE(InfoExtractor):
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.nrk.no/video/PS*150533',
|
||||
'md5': 'bccd850baebefe23b56d708a113229c2',
|
||||
# MD5 is unstable
|
||||
'info_dict': {
|
||||
'id': '150533',
|
||||
'ext': 'flv',
|
||||
@@ -34,7 +34,7 @@ class NRKIE(InfoExtractor):
|
||||
},
|
||||
{
|
||||
'url': 'http://www.nrk.no/video/PS*154915',
|
||||
'md5': '0b1493ba1aae7d9579a5ad5531bc395a',
|
||||
# MD5 is unstable
|
||||
'info_dict': {
|
||||
'id': '154915',
|
||||
'ext': 'flv',
|
||||
|
||||
@@ -5,8 +5,6 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
sanitized_Request,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
@@ -20,7 +18,6 @@ class NuvidIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Horny babes show their awesome bodeis and',
|
||||
'duration': 129,
|
||||
'upload_date': '20140508',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
@@ -28,28 +25,31 @@ class NuvidIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
formats = []
|
||||
page_url = 'http://m.nuvid.com/video/%s' % video_id
|
||||
webpage = self._download_webpage(
|
||||
page_url, video_id, 'Downloading video page')
|
||||
# When dwnld_speed exists and has a value larger than the MP4 file's
|
||||
# bitrate, Nuvid returns the MP4 URL
|
||||
# It's unit is 100bytes/millisecond, see mobile-nuvid-min.js for the algorithm
|
||||
self._set_cookie('nuvid.com', 'dwnld_speed', '10.0')
|
||||
mp4_webpage = self._download_webpage(
|
||||
page_url, video_id, 'Downloading video page for MP4 format')
|
||||
|
||||
for dwnld_speed, format_id in [(0, '3gp'), (5, 'mp4')]:
|
||||
request = sanitized_Request(
|
||||
'http://m.nuvid.com/play/%s' % video_id)
|
||||
request.add_header('Cookie', 'skip_download_page=1; dwnld_speed=%d; adv_show=1' % dwnld_speed)
|
||||
webpage = self._download_webpage(
|
||||
request, video_id, 'Downloading %s page' % format_id)
|
||||
video_url = self._html_search_regex(
|
||||
r'<a\s+href="([^"]+)"\s+class="b_link">', webpage, '%s video URL' % format_id, fatal=False)
|
||||
if not video_url:
|
||||
continue
|
||||
html5_video_re = r'(?s)<(?:video|audio)[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']',
|
||||
video_url = self._html_search_regex(html5_video_re, webpage, video_id)
|
||||
mp4_video_url = self._html_search_regex(html5_video_re, mp4_webpage, video_id)
|
||||
formats = [{
|
||||
'url': video_url,
|
||||
}]
|
||||
if mp4_video_url != video_url:
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
'url': mp4_video_url,
|
||||
})
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://m.nuvid.com/video/%s' % video_id, video_id, 'Downloading video page')
|
||||
title = self._html_search_regex(
|
||||
[r'<span title="([^"]+)">',
|
||||
r'<div class="thumb-holder video">\s*<h5[^>]*>([^<]+)</h5>'], webpage, 'title').strip()
|
||||
r'<div class="thumb-holder video">\s*<h5[^>]*>([^<]+)</h5>',
|
||||
r'<span[^>]+class="title_thumb">([^<]+)</span>'], webpage, 'title').strip()
|
||||
thumbnails = [
|
||||
{
|
||||
'url': thumb_url,
|
||||
@@ -57,9 +57,8 @@ class NuvidIE(InfoExtractor):
|
||||
]
|
||||
thumbnail = thumbnails[0]['url'] if thumbnails else None
|
||||
duration = parse_duration(self._html_search_regex(
|
||||
r'<i class="fa fa-clock-o"></i>\s*(\d{2}:\d{2})', webpage, 'duration', fatal=False))
|
||||
upload_date = unified_strdate(self._html_search_regex(
|
||||
r'<i class="fa fa-user"></i>\s*(\d{4}-\d{2}-\d{2})', webpage, 'upload date', fatal=False))
|
||||
[r'<i class="fa fa-clock-o"></i>\s*(\d{2}:\d{2})',
|
||||
r'<span[^>]+class="view_time">([^<]+)</span>'], webpage, 'duration', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@@ -67,7 +66,6 @@ class NuvidIE(InfoExtractor):
|
||||
'thumbnails': thumbnails,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'upload_date': upload_date,
|
||||
'age_limit': 18,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
@@ -2,7 +2,11 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
unified_strdate,
|
||||
@@ -32,7 +36,7 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
'skip': 'Video has been blocked',
|
||||
}, {
|
||||
# metadataUrl
|
||||
'url': 'http://ok.ru/video/63567059965189-0',
|
||||
'url': 'http://ok.ru/video/63567059965189-0?fromTime=5',
|
||||
'md5': '9676cf86eff5391d35dea675d224e131',
|
||||
'info_dict': {
|
||||
'id': '63567059965189-0',
|
||||
@@ -44,6 +48,7 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
'uploader': '☭ Андрей Мещанинов ☭',
|
||||
'like_count': int,
|
||||
'age_limit': 0,
|
||||
'start_time': 5,
|
||||
},
|
||||
}, {
|
||||
# YouTube embed (metadataUrl, provider == USER_YOUTUBE)
|
||||
@@ -60,6 +65,22 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
'uploader': 'Алина П',
|
||||
'age_limit': 0,
|
||||
},
|
||||
}, {
|
||||
# YouTube embed (metadata, provider == USER_YOUTUBE, no metadata.movie.title field)
|
||||
'url': 'http://ok.ru/video/62036049272859-0',
|
||||
'info_dict': {
|
||||
'id': '62036049272859-0',
|
||||
'ext': 'mp4',
|
||||
'title': 'МУЗЫКА ДОЖДЯ .',
|
||||
'description': 'md5:6f1867132bd96e33bf53eda1091e8ed0',
|
||||
'upload_date': '20120106',
|
||||
'uploader_id': '473534735899',
|
||||
'uploader': 'МARINA D',
|
||||
'age_limit': 0,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452',
|
||||
'only_matching': True,
|
||||
@@ -78,6 +99,9 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
start_time = int_or_none(compat_parse_qs(
|
||||
compat_urllib_parse_urlparse(url).query).get('fromTime', [None])[0])
|
||||
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
@@ -106,7 +130,14 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
video_id, 'Downloading metadata JSON')
|
||||
|
||||
movie = metadata['movie']
|
||||
title = movie['title']
|
||||
|
||||
# Some embedded videos may not contain title in movie dict (e.g.
|
||||
# http://ok.ru/video/62036049272859-0) thus we allow missing title
|
||||
# here and it's going to be extracted later by an extractor that
|
||||
# will process the actual embed.
|
||||
provider = metadata.get('provider')
|
||||
title = movie['title'] if provider == 'UPLOADED_ODKL' else movie.get('title')
|
||||
|
||||
thumbnail = movie.get('poster')
|
||||
duration = int_or_none(movie.get('duration'))
|
||||
|
||||
@@ -135,9 +166,10 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
'uploader_id': uploader_id,
|
||||
'like_count': like_count,
|
||||
'age_limit': age_limit,
|
||||
'start_time': start_time,
|
||||
}
|
||||
|
||||
if metadata.get('provider') == 'USER_YOUTUBE':
|
||||
if provider == 'USER_YOUTUBE':
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'url': movie['contentId'],
|
||||
|
||||
@@ -65,7 +65,7 @@ class OnionStudiosIE(InfoExtractor):
|
||||
r'share_title\s*=\s*(["\'])(?P<title>[^\1]+?)\1',
|
||||
webpage, 'title', group='title')
|
||||
description = self._search_regex(
|
||||
r'share_description\s*=\s*(["\'])(?P<description>[^\1]+?)\1',
|
||||
r'share_description\s*=\s*(["\'])(?P<description>[^\'"]+?)\1',
|
||||
webpage, 'description', default=None, group='description')
|
||||
thumbnail = self._search_regex(
|
||||
r'poster\s*=\s*(["\'])(?P<thumbnail>[^\1]+?)\1',
|
||||
|
||||
@@ -96,6 +96,8 @@ class OoyalaIE(OoyalaBaseIE):
|
||||
'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
|
||||
'duration': 853.386,
|
||||
},
|
||||
# The video in the original webpage now uses PlayWire
|
||||
'skip': 'Ooyala said: movie expired',
|
||||
}, {
|
||||
# Only available for ipad
|
||||
'url': 'http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0',
|
||||
|
||||
@@ -6,8 +6,10 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_chr
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
encode_base_n,
|
||||
ExtractorError,
|
||||
mimetype2ext,
|
||||
)
|
||||
|
||||
|
||||
@@ -29,6 +31,11 @@ class OpenloadIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://openload.io/f/ZAn6oz-VZGE/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# unavailable via https://openload.co/f/Sxz5sADo82g/, different layout
|
||||
# for title and ext
|
||||
'url': 'https://openload.co/embed/Sxz5sADo82g/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@@ -96,12 +103,25 @@ class OpenloadIE(InfoExtractor):
|
||||
r'<video[^>]+>\s*<script[^>]+>([^<]+)</script>',
|
||||
webpage, 'JS code')
|
||||
|
||||
decoded = self.openload_decode(code)
|
||||
|
||||
video_url = self._search_regex(
|
||||
r'return\s+"(https?://[^"]+)"', self.openload_decode(code), 'video URL')
|
||||
r'return\s+"(https?://[^"]+)"', decoded, 'video URL')
|
||||
|
||||
title = self._og_search_title(webpage, default=None) or self._search_regex(
|
||||
r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
|
||||
'title', default=None) or self._html_search_meta(
|
||||
'description', webpage, 'title', fatal=True)
|
||||
|
||||
ext = mimetype2ext(self._search_regex(
|
||||
r'window\.vt\s*=\s*(["\'])(?P<mimetype>.+?)\1', decoded,
|
||||
'mimetype', default=None, group='mimetype')) or determine_ext(
|
||||
video_url, 'mp4')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'title': title,
|
||||
'ext': ext,
|
||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||
'url': video_url,
|
||||
}
|
||||
|
||||
@@ -185,6 +185,7 @@ class ORFFM4IE(InfoExtractor):
|
||||
'timestamp': 1452456073,
|
||||
'upload_date': '20160110',
|
||||
},
|
||||
'skip': 'Live streams on FM4 got deleted soon',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -196,7 +196,7 @@ class PBSIE(InfoExtractor):
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/',
|
||||
'md5': 'ce1888486f0908d555a8093cac9a7362',
|
||||
'md5': '173dc391afd361fa72eab5d3d918968d',
|
||||
'info_dict': {
|
||||
'id': '2365006249',
|
||||
'ext': 'mp4',
|
||||
@@ -204,13 +204,10 @@ class PBSIE(InfoExtractor):
|
||||
'description': 'md5:36f341ae62e251b8f5bd2b754b95a071',
|
||||
'duration': 3190,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.pbs.org/wgbh/pages/frontline/losing-iraq/',
|
||||
'md5': '143c98aa54a346738a3d78f54c925321',
|
||||
'md5': '6f722cb3c3982186d34b0f13374499c7',
|
||||
'info_dict': {
|
||||
'id': '2365297690',
|
||||
'ext': 'mp4',
|
||||
@@ -218,9 +215,6 @@ class PBSIE(InfoExtractor):
|
||||
'description': 'md5:4d3eaa01f94e61b3e73704735f1196d9',
|
||||
'duration': 5050,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.pbs.org/newshour/bb/education-jan-june12-cyberschools_02-23/',
|
||||
@@ -244,9 +238,6 @@ class PBSIE(InfoExtractor):
|
||||
'duration': 6559,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.pbs.org/wgbh/nova/earth/killer-typhoon.html',
|
||||
@@ -262,9 +253,6 @@ class PBSIE(InfoExtractor):
|
||||
'upload_date': '20140122',
|
||||
'age_limit': 10,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.pbs.org/wgbh/pages/frontline/united-states-of-secrets/',
|
||||
@@ -290,6 +278,7 @@ class PBSIE(InfoExtractor):
|
||||
},
|
||||
{
|
||||
'url': 'http://www.pbs.org/video/2365245528/',
|
||||
'md5': '115223d41bd55cda8ae5cd5ed4e11497',
|
||||
'info_dict': {
|
||||
'id': '2365245528',
|
||||
'display_id': '2365245528',
|
||||
@@ -299,15 +288,13 @@ class PBSIE(InfoExtractor):
|
||||
'duration': 6851,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
},
|
||||
},
|
||||
{
|
||||
# Video embedded in iframe containing angle brackets as attribute's value (e.g.
|
||||
# "<iframe style='position: absolute;<br />\ntop: 0; left: 0;' ...", see
|
||||
# https://github.com/rg3/youtube-dl/issues/7059)
|
||||
'url': 'http://www.pbs.org/food/features/a-chefs-life-season-3-episode-5-prickly-business/',
|
||||
'md5': '84ced42850d78f1d4650297356e95e6f',
|
||||
'info_dict': {
|
||||
'id': '2365546844',
|
||||
'display_id': 'a-chefs-life-season-3-episode-5-prickly-business',
|
||||
@@ -317,9 +304,6 @@ class PBSIE(InfoExtractor):
|
||||
'duration': 1480,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
},
|
||||
},
|
||||
{
|
||||
# Frontline video embedded via flp2012.js
|
||||
@@ -340,6 +324,7 @@ class PBSIE(InfoExtractor):
|
||||
{
|
||||
# Serves hd only via wigget/partnerplayer page
|
||||
'url': 'http://www.pbs.org/video/2365641075/',
|
||||
'md5': 'acfd4c400b48149a44861cb16dd305cf',
|
||||
'info_dict': {
|
||||
'id': '2365641075',
|
||||
'ext': 'mp4',
|
||||
@@ -348,9 +333,6 @@ class PBSIE(InfoExtractor):
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'formats': 'mincount:8',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://player.pbs.org/widget/partnerplayer/2365297708/?start=0&end=0&chapterbar=false&endscreen=false&topbar=true',
|
||||
@@ -494,6 +476,7 @@ class PBSIE(InfoExtractor):
|
||||
info = video_info
|
||||
|
||||
formats = []
|
||||
http_url = None
|
||||
for num, redirect in enumerate(redirects):
|
||||
redirect_id = redirect.get('eeid')
|
||||
|
||||
@@ -514,13 +497,32 @@ class PBSIE(InfoExtractor):
|
||||
|
||||
if determine_ext(format_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, display_id, 'mp4', preference=1, m3u8_id='hls'))
|
||||
format_url, display_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': redirect_id,
|
||||
})
|
||||
if re.search(r'^https?://.*(?:\d+k|baseline)', format_url):
|
||||
http_url = format_url
|
||||
self._remove_duplicate_formats(formats)
|
||||
m3u8_formats = list(filter(
|
||||
lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
|
||||
formats))
|
||||
if http_url:
|
||||
for m3u8_format in m3u8_formats:
|
||||
bitrate = self._search_regex(r'(\d+k)', m3u8_format['url'], 'bitrate', default=None)
|
||||
# extract only the formats that we know that they will be available as http format.
|
||||
# https://projects.pbs.org/confluence/display/coveapi/COVE+Video+Specifications
|
||||
if not bitrate or bitrate not in ('400k', '800k', '1200k', '2500k'):
|
||||
continue
|
||||
f = m3u8_format.copy()
|
||||
f.update({
|
||||
'url': re.sub(r'\d+k|baseline', bitrate, http_url),
|
||||
'format_id': m3u8_format['format_id'].replace('hls', 'http'),
|
||||
'protocol': 'http',
|
||||
})
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
rating_str = info.get('rating')
|
||||
@@ -535,6 +537,19 @@ class PBSIE(InfoExtractor):
|
||||
'ext': 'ttml',
|
||||
'url': closed_captions_url,
|
||||
}]
|
||||
mobj = re.search(r'/(\d+)_Encoded\.dfxp', closed_captions_url)
|
||||
if mobj:
|
||||
ttml_caption_suffix, ttml_caption_id = mobj.group(0, 1)
|
||||
ttml_caption_id = int(ttml_caption_id)
|
||||
subtitles['en'].extend([{
|
||||
'url': closed_captions_url.replace(
|
||||
ttml_caption_suffix, '/%d_Encoded.srt' % (ttml_caption_id + 1)),
|
||||
'ext': 'srt',
|
||||
}, {
|
||||
'url': closed_captions_url.replace(
|
||||
ttml_caption_suffix, '/%d_Encoded.vtt' % (ttml_caption_id + 2)),
|
||||
'ext': 'vtt',
|
||||
}])
|
||||
|
||||
# info['title'] is often incomplete (e.g. 'Full Episode', 'Episode 5', etc)
|
||||
# Try turning it to 'program - title' naming scheme if possible
|
||||
|
||||
32
youtube_dl/extractor/people.py
Normal file
32
youtube_dl/extractor/people.py
Normal file
@@ -0,0 +1,32 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class PeopleIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?people\.com/people/videos/0,,(?P<id>\d+),00\.html'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.people.com/people/videos/0,,20995451,00.html',
|
||||
'info_dict': {
|
||||
'id': 'ref:20995451',
|
||||
'ext': 'mp4',
|
||||
'title': 'Astronaut Love Triangle Victim Speaks Out: “The Crime in 2007 Hasn’t Defined Us”',
|
||||
'description': 'Colleen Shipman speaks to PEOPLE for the first time about life after the attack',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'duration': 246.318,
|
||||
'timestamp': 1458720585,
|
||||
'upload_date': '20160323',
|
||||
'uploader_id': '416418724',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['BrightcoveNew'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self.url_result(
|
||||
'http://players.brightcove.net/416418724/default_default/index.html?videoId=ref:%s'
|
||||
% self._match_id(url), 'BrightcoveNew')
|
||||
@@ -1,61 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class PlanetaPlayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?planetaplay\.com/\?sng=(?P<id>[0-9]+)'
|
||||
_API_URL = 'http://planetaplay.com/action/playlist/?sng={0:}'
|
||||
_THUMBNAIL_URL = 'http://planetaplay.com/img/thumb/{thumb:}'
|
||||
_TEST = {
|
||||
'url': 'http://planetaplay.com/?sng=3586',
|
||||
'md5': '9d569dceb7251a4e01355d5aea60f9db',
|
||||
'info_dict': {
|
||||
'id': '3586',
|
||||
'ext': 'flv',
|
||||
'title': 'md5:e829428ee28b1deed00de90de49d1da1',
|
||||
},
|
||||
'skip': 'Not accessible from Travis CI server',
|
||||
}
|
||||
|
||||
_SONG_FORMATS = {
|
||||
'lq': (0, 'http://www.planetaplay.com/videoplayback/{med_hash:}'),
|
||||
'hq': (1, 'http://www.planetaplay.com/videoplayback/hi/{med_hash:}'),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
response = self._download_json(
|
||||
self._API_URL.format(video_id), video_id)['response']
|
||||
try:
|
||||
data = response.get('data')[0]
|
||||
except IndexError:
|
||||
raise ExtractorError(
|
||||
'%s: failed to get the playlist' % self.IE_NAME, expected=True)
|
||||
|
||||
title = '{song_artists:} - {sng_name:}'.format(**data)
|
||||
thumbnail = self._THUMBNAIL_URL.format(**data)
|
||||
|
||||
formats = []
|
||||
for format_id, (quality, url_template) in self._SONG_FORMATS.items():
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': url_template.format(**data),
|
||||
'quality': quality,
|
||||
'ext': 'flv',
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
@@ -1,54 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class QuickVidIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(www\.)?quickvid\.org/watch\.php\?v=(?P<id>[a-zA-Z_0-9-]+)'
|
||||
_TEST = {
|
||||
'url': 'http://quickvid.org/watch.php?v=sUQT3RCG8dx',
|
||||
'md5': 'c0c72dd473f260c06c808a05d19acdc5',
|
||||
'info_dict': {
|
||||
'id': 'sUQT3RCG8dx',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nick Offerman\'s Summer Reading Recap',
|
||||
'thumbnail': 're:^https?://.*\.(?:png|jpg|gif)$',
|
||||
'view_count': int,
|
||||
},
|
||||
'skip': 'Not accessible from Travis CI server',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(r'<h2>(.*?)</h2>', webpage, 'title')
|
||||
view_count = int_or_none(self._html_search_regex(
|
||||
r'(?s)<div id="views">(.*?)</div>',
|
||||
webpage, 'view count', fatal=False))
|
||||
video_code = self._search_regex(
|
||||
r'(?s)<video id="video"[^>]*>(.*?)</video>', webpage, 'video code')
|
||||
formats = [
|
||||
{
|
||||
'url': compat_urlparse.urljoin(url, src),
|
||||
'format_id': determine_ext(src, None),
|
||||
} for src in re.findall('<source\s+src="([^"]+)"', video_code)
|
||||
]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'view_count': view_count,
|
||||
}
|
||||
@@ -4,12 +4,18 @@ from __future__ import unicode_literals
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
unescapeHTML,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class RTBFIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?rtbf\.be/(?:video/[^?]+\?.*\bid=|ouftivi/(?:[^/]+/)*[^?]+\?.*\bvideoId=)(?P<id>\d+)'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?rtbf\.be/
|
||||
(?:
|
||||
video/[^?]+\?.*\bid=|
|
||||
ouftivi/(?:[^/]+/)*[^?]+\?.*\bvideoId=|
|
||||
auvio/[^/]+\?.*id=
|
||||
)(?P<id>\d+)'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.rtbf.be/video/detail_les-diables-au-coeur-episode-2?id=1921274',
|
||||
'md5': '799f334ddf2c0a582ba80c44655be570',
|
||||
@@ -17,7 +23,11 @@ class RTBFIE(InfoExtractor):
|
||||
'id': '1921274',
|
||||
'ext': 'mp4',
|
||||
'title': 'Les Diables au coeur (épisode 2)',
|
||||
'description': 'Football - Diables Rouges',
|
||||
'duration': 3099,
|
||||
'upload_date': '20140425',
|
||||
'timestamp': 1398456336,
|
||||
'uploader': 'rtbfsport',
|
||||
}
|
||||
}, {
|
||||
# geo restricted
|
||||
@@ -26,45 +36,63 @@ class RTBFIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.rtbf.be/ouftivi/niouzz?videoId=2055858',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.rtbf.be/auvio/detail_jeudi-en-prime-siegfried-bracke?id=2102996',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_IMAGE_HOST = 'http://ds1.ds.static.rtbf.be'
|
||||
_PROVIDERS = {
|
||||
'YOUTUBE': 'Youtube',
|
||||
'DAILYMOTION': 'Dailymotion',
|
||||
'VIMEO': 'Vimeo',
|
||||
}
|
||||
_QUALITIES = [
|
||||
('mobile', 'mobile'),
|
||||
('web', 'SD'),
|
||||
('url', 'MD'),
|
||||
('mobile', 'SD'),
|
||||
('web', 'MD'),
|
||||
('high', 'HD'),
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
data = self._download_json(
|
||||
'http://www.rtbf.be/api/media/video?method=getVideoDetail&args[]=%s' % video_id, video_id)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://www.rtbf.be/video/embed?id=%s' % video_id, video_id)
|
||||
error = data.get('error')
|
||||
if error:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
|
||||
|
||||
data = self._parse_json(
|
||||
unescapeHTML(self._search_regex(
|
||||
r'data-media="([^"]+)"', webpage, 'data video')),
|
||||
video_id)
|
||||
data = data['data']
|
||||
|
||||
provider = data.get('provider')
|
||||
if provider in self._PROVIDERS:
|
||||
return self.url_result(data['url'], self._PROVIDERS[provider])
|
||||
|
||||
if data.get('provider').lower() == 'youtube':
|
||||
video_url = data.get('downloadUrl') or data.get('url')
|
||||
return self.url_result(video_url, 'Youtube')
|
||||
formats = []
|
||||
for key, format_id in self._QUALITIES:
|
||||
format_url = data['sources'].get(key)
|
||||
format_url = data.get(key + 'Url')
|
||||
if format_url:
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': format_url,
|
||||
})
|
||||
|
||||
thumbnails = []
|
||||
for thumbnail_id, thumbnail_url in data.get('thumbnail', {}).items():
|
||||
if thumbnail_id != 'default':
|
||||
thumbnails.append({
|
||||
'url': self._IMAGE_HOST + thumbnail_url,
|
||||
'id': thumbnail_id,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': data['title'],
|
||||
'description': data.get('description') or data.get('subtitle'),
|
||||
'thumbnail': data.get('thumbnail'),
|
||||
'thumbnails': thumbnails,
|
||||
'duration': data.get('duration') or data.get('realDuration'),
|
||||
'timestamp': int_or_none(data.get('created')),
|
||||
'view_count': int_or_none(data.get('viewCount')),
|
||||
'uploader': data.get('channel'),
|
||||
'tags': data.get('tags'),
|
||||
}
|
||||
|
||||
@@ -20,18 +20,19 @@ class RtlNlIE(InfoExtractor):
|
||||
(?P<id>[0-9a-f-]+)'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/6e4203a6-0a5e-3596-8424-c599a59e0677',
|
||||
'md5': 'cc16baa36a6c169391f0764fa6b16654',
|
||||
'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/82b1aad1-4a14-3d7b-b554-b0aed1b2c416',
|
||||
'md5': '473d1946c1fdd050b2c0161a4b13c373',
|
||||
'info_dict': {
|
||||
'id': '6e4203a6-0a5e-3596-8424-c599a59e0677',
|
||||
'id': '82b1aad1-4a14-3d7b-b554-b0aed1b2c416',
|
||||
'ext': 'mp4',
|
||||
'title': 'RTL Nieuws - Laat',
|
||||
'description': 'md5:6b61f66510c8889923b11f2778c72dc5',
|
||||
'timestamp': 1408051800,
|
||||
'upload_date': '20140814',
|
||||
'duration': 576.880,
|
||||
'title': 'RTL Nieuws',
|
||||
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||
'timestamp': 1461951000,
|
||||
'upload_date': '20160429',
|
||||
'duration': 1167.96,
|
||||
},
|
||||
}, {
|
||||
# best format avaialble a3t
|
||||
'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false',
|
||||
'md5': 'dea7474214af1271d91ef332fb8be7ea',
|
||||
'info_dict': {
|
||||
@@ -39,18 +40,19 @@ class RtlNlIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1424039400,
|
||||
'title': 'RTL Nieuws - Nieuwe beelden Kopenhagen: chaos direct na aanslag',
|
||||
'thumbnail': 're:^https?://screenshots\.rtl\.nl/system/thumb/sz=[0-9]+x[0-9]+/uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed$',
|
||||
'thumbnail': 're:^https?://screenshots\.rtl\.nl/(?:[^/]+/)*sz=[0-9]+x[0-9]+/uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed$',
|
||||
'upload_date': '20150215',
|
||||
'description': 'Er zijn nieuwe beelden vrijgegeven die vlak na de aanslag in Kopenhagen zijn gemaakt. Op de video is goed te zien hoe omstanders zich bekommeren om één van de slachtoffers, terwijl de eerste agenten ter plaatse komen.',
|
||||
}
|
||||
}, {
|
||||
# empty synopsis and missing episodes (see https://github.com/rg3/youtube-dl/issues/6275)
|
||||
# best format available nettv
|
||||
'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a/autoplay=false',
|
||||
'info_dict': {
|
||||
'id': 'f536aac0-1dc3-4314-920e-3bd1c5b3811a',
|
||||
'ext': 'mp4',
|
||||
'title': 'RTL Nieuws - Meer beelden van overval juwelier',
|
||||
'thumbnail': 're:^https?://screenshots\.rtl\.nl/system/thumb/sz=[0-9]+x[0-9]+/uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a$',
|
||||
'thumbnail': 're:^https?://screenshots\.rtl\.nl/(?:[^/]+/)*sz=[0-9]+x[0-9]+/uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a$',
|
||||
'timestamp': 1437233400,
|
||||
'upload_date': '20150718',
|
||||
'duration': 30.474,
|
||||
@@ -94,22 +96,46 @@ class RtlNlIE(InfoExtractor):
|
||||
videopath = material['videopath']
|
||||
m3u8_url = meta.get('videohost', 'http://manifest.us.rtl.nl') + videopath
|
||||
|
||||
formats = self._extract_m3u8_formats(m3u8_url, uuid, ext='mp4')
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, uuid, 'mp4', m3u8_id='hls', fatal=False)
|
||||
|
||||
video_urlpart = videopath.split('/adaptive/')[1][:-5]
|
||||
PG_URL_TEMPLATE = 'http://pg.us.rtl.nl/rtlxl/network/%s/progressive/%s.mp4'
|
||||
|
||||
formats.extend([
|
||||
{
|
||||
'url': PG_URL_TEMPLATE % ('a2m', video_urlpart),
|
||||
'format_id': 'pg-sd',
|
||||
},
|
||||
{
|
||||
'url': PG_URL_TEMPLATE % ('a3m', video_urlpart),
|
||||
'format_id': 'pg-hd',
|
||||
'quality': 0,
|
||||
PG_FORMATS = (
|
||||
('a2t', 512, 288),
|
||||
('a3t', 704, 400),
|
||||
('nettv', 1280, 720),
|
||||
)
|
||||
|
||||
def pg_format(format_id, width, height):
|
||||
return {
|
||||
'url': PG_URL_TEMPLATE % (format_id, video_urlpart),
|
||||
'format_id': 'pg-%s' % format_id,
|
||||
'protocol': 'http',
|
||||
'width': width,
|
||||
'height': height,
|
||||
}
|
||||
])
|
||||
|
||||
if not formats:
|
||||
formats = [pg_format(*pg_tuple) for pg_tuple in PG_FORMATS]
|
||||
else:
|
||||
pg_formats = []
|
||||
for format_id, width, height in PG_FORMATS:
|
||||
try:
|
||||
# Find hls format with the same width and height corresponding
|
||||
# to progressive format and copy metadata from it.
|
||||
f = next(f for f in formats if f.get('height') == height)
|
||||
# hls formats may have invalid width
|
||||
f['width'] = width
|
||||
f_copy = f.copy()
|
||||
f_copy.update(pg_format(format_id, width, height))
|
||||
pg_formats.append(f_copy)
|
||||
except StopIteration:
|
||||
# Missing hls format does mean that no progressive format with
|
||||
# such width and height exists either.
|
||||
pass
|
||||
formats.extend(pg_formats)
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = []
|
||||
|
||||
@@ -18,6 +18,7 @@ class SciVeeIE(InfoExtractor):
|
||||
'title': 'Adam Arkin at the 2014 DOE JGI Genomics of Energy & Environment Meeting',
|
||||
'description': 'md5:81f1710638e11a481358fab1b11059d7',
|
||||
},
|
||||
'skip': 'Not accessible from Travis CI server',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -12,7 +12,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class ScreenwaveMediaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?.*\bid=(?P<id>[A-Za-z0-9-]+)'
|
||||
_VALID_URL = r'(?:https?:)?//player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?.*\bid=(?P<id>[A-Za-z0-9-]+)'
|
||||
EMBED_PATTERN = r'src=(["\'])(?P<url>(?:https?:)?//player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?.*\bid=.+?)\1'
|
||||
_TESTS = [{
|
||||
'url': 'http://player.screenwavemedia.com/play/play.php?playerdiv=videoarea&companiondiv=squareAd&id=Cinemassacre-19911',
|
||||
|
||||
@@ -14,7 +14,6 @@ class StreetVoiceIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '94440',
|
||||
'ext': 'mp3',
|
||||
'filesize': 4167053,
|
||||
'title': '輸',
|
||||
'description': 'Crispy脆樂團 - 輸',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
@@ -32,20 +31,19 @@ class StreetVoiceIE(InfoExtractor):
|
||||
song_id = self._match_id(url)
|
||||
|
||||
song = self._download_json(
|
||||
'http://streetvoice.com/music/api/song/%s' % song_id, song_id)
|
||||
'https://streetvoice.com/api/v1/public/song/%s/' % song_id, song_id, data=b'')
|
||||
|
||||
title = song['name']
|
||||
author = song['musician']['name']
|
||||
author = song['user']['nickname']
|
||||
|
||||
return {
|
||||
'id': song_id,
|
||||
'url': song['file'],
|
||||
'filesize': song.get('size'),
|
||||
'title': title,
|
||||
'description': '%s - %s' % (author, title),
|
||||
'thumbnail': self._proto_relative_url(song.get('image'), 'http:'),
|
||||
'duration': song.get('length'),
|
||||
'upload_date': unified_strdate(song.get('created_at')),
|
||||
'uploader': author,
|
||||
'uploader_id': compat_str(song['musician']['id']),
|
||||
'uploader_id': compat_str(song['user']['id']),
|
||||
}
|
||||
|
||||
@@ -4,42 +4,178 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_filesize
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
js_to_json,
|
||||
parse_iso8601,
|
||||
parse_filesize,
|
||||
)
|
||||
|
||||
|
||||
class TagesschauPlayerIE(InfoExtractor):
|
||||
IE_NAME = 'tagesschau:player'
|
||||
_VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/(?P<kind>audio|video)/(?P=kind)-(?P<id>\d+)~player(?:_[^/?#&]+)?\.html'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.tagesschau.de/multimedia/video/video-179517~player.html',
|
||||
'md5': '8d09548d5c15debad38bee3a4d15ca21',
|
||||
'info_dict': {
|
||||
'id': '179517',
|
||||
'ext': 'mp4',
|
||||
'title': 'Marie Kristin Boese, ARD Berlin, über den zukünftigen Kurs der AfD',
|
||||
'thumbnail': 're:^https?:.*\.jpg$',
|
||||
'formats': 'mincount:6',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.tagesschau.de/multimedia/audio/audio-29417~player.html',
|
||||
'md5': '76e6eec6ebd40740671cf0a2c88617e5',
|
||||
'info_dict': {
|
||||
'id': '29417',
|
||||
'ext': 'mp3',
|
||||
'title': 'Trabi - Bye, bye Rennpappe',
|
||||
'thumbnail': 're:^https?:.*\.jpg$',
|
||||
'formats': 'mincount:2',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.tagesschau.de/multimedia/audio/audio-29417~player_autoplay-true.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_FORMATS = {
|
||||
'xs': {'quality': 0},
|
||||
's': {'width': 320, 'height': 180, 'quality': 1},
|
||||
'm': {'width': 512, 'height': 288, 'quality': 2},
|
||||
'l': {'width': 960, 'height': 540, 'quality': 3},
|
||||
'xl': {'width': 1280, 'height': 720, 'quality': 4},
|
||||
'xxl': {'quality': 5},
|
||||
}
|
||||
|
||||
def _extract_via_api(self, kind, video_id):
|
||||
info = self._download_json(
|
||||
'https://www.tagesschau.de/api/multimedia/{0}/{0}-{1}.json'.format(kind, video_id),
|
||||
video_id)
|
||||
title = info['headline']
|
||||
formats = []
|
||||
for media in info['mediadata']:
|
||||
for format_id, format_url in media.items():
|
||||
if determine_ext(format_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls'))
|
||||
else:
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': format_id,
|
||||
'vcodec': 'none' if kind == 'audio' else None,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
timestamp = parse_iso8601(info.get('date'))
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'timestamp': timestamp,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
# kind = mobj.group('kind').lower()
|
||||
# if kind == 'video':
|
||||
# return self._extract_via_api(kind, video_id)
|
||||
|
||||
# JSON api does not provide some audio formats (e.g. ogg) thus
|
||||
# extractiong audio via webpage
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._og_search_title(webpage).strip()
|
||||
formats = []
|
||||
|
||||
for media_json in re.findall(r'({src\s*:\s*["\']http[^}]+type\s*:[^}]+})', webpage):
|
||||
media = self._parse_json(js_to_json(media_json), video_id, fatal=False)
|
||||
if not media:
|
||||
continue
|
||||
src = media.get('src')
|
||||
if not src:
|
||||
return
|
||||
quality = media.get('quality')
|
||||
kind = media.get('type', '').split('/')[0]
|
||||
ext = determine_ext(src)
|
||||
f = {
|
||||
'url': src,
|
||||
'format_id': '%s_%s' % (quality, ext) if quality else ext,
|
||||
'ext': ext,
|
||||
'vcodec': 'none' if kind == 'audio' else None,
|
||||
}
|
||||
f.update(self._FORMATS.get(quality, {}))
|
||||
formats.append(f)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class TagesschauIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/(?:[^/]+/)*?[^/#?]+?(?P<id>-?[0-9]+)(?:~_[^/#?]+?)?\.html'
|
||||
_VALID_URL = r'https?://(?:www\.)?tagesschau\.de/(?P<path>[^/]+/(?:[^/]+/)*?(?P<id>[^/#?]+?(?:-?[0-9]+)?))(?:~_?[^/#?]+?)?\.html'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.tagesschau.de/multimedia/video/video-102143.html',
|
||||
'md5': '917a228bc7df7850783bc47979673a09',
|
||||
'md5': 'f7c27a0eff3bfe8c7727e65f8fe1b1e6',
|
||||
'info_dict': {
|
||||
'id': '102143',
|
||||
'id': 'video-102143',
|
||||
'ext': 'mp4',
|
||||
'title': 'Regierungsumbildung in Athen: Neue Minister in Griechenland vereidigt',
|
||||
'description': 'md5:171feccd9d9b3dd54d05d501568f6359',
|
||||
'description': '18.07.2015 20:10 Uhr',
|
||||
'thumbnail': 're:^https?:.*\.jpg$',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.tagesschau.de/multimedia/sendung/ts-5727.html',
|
||||
'md5': '3c54c1f6243d279b706bde660ceec633',
|
||||
'info_dict': {
|
||||
'id': '5727',
|
||||
'id': 'ts-5727',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:695c01bfd98b7e313c501386327aea59',
|
||||
'title': 'Sendung: tagesschau \t04.12.2014 20:00 Uhr',
|
||||
'description': 'md5:695c01bfd98b7e313c501386327aea59',
|
||||
'thumbnail': 're:^https?:.*\.jpg$',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.tagesschau.de/multimedia/politikimradio/audio-18407.html',
|
||||
'md5': 'aef45de271c4bf0a5db834aa40bf774c',
|
||||
# exclusive audio
|
||||
'url': 'http://www.tagesschau.de/multimedia/audio/audio-29417.html',
|
||||
'md5': '76e6eec6ebd40740671cf0a2c88617e5',
|
||||
'info_dict': {
|
||||
'id': '18407',
|
||||
'id': 'audio-29417',
|
||||
'ext': 'mp3',
|
||||
'title': 'Flüchtlingsdebatte: Hitzig, aber wenig hilfreich',
|
||||
'description': 'Flüchtlingsdebatte: Hitzig, aber wenig hilfreich',
|
||||
'title': 'Trabi - Bye, bye Rennpappe',
|
||||
'description': 'md5:8687dda862cbbe2cfb2df09b56341317',
|
||||
'thumbnail': 're:^https?:.*\.jpg$',
|
||||
},
|
||||
}, {
|
||||
# audio in article
|
||||
'url': 'http://www.tagesschau.de/inland/bnd-303.html',
|
||||
'md5': 'e0916c623e85fc1d2b26b78f299d3958',
|
||||
'info_dict': {
|
||||
'id': 'bnd-303',
|
||||
'ext': 'mp3',
|
||||
'title': 'Viele Baustellen für neuen BND-Chef',
|
||||
'description': 'md5:1e69a54be3e1255b2b07cdbce5bcd8b4',
|
||||
'thumbnail': 're:^https?:.*\.jpg$',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.tagesschau.de/inland/afd-parteitag-135.html',
|
||||
'info_dict': {
|
||||
'id': 'afd-parteitag-135',
|
||||
'title': 'Möchtegern-Underdog mit Machtanspruch',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}, {
|
||||
'url': 'http://www.tagesschau.de/multimedia/sendung/tsg-3771.html',
|
||||
'only_matching': True,
|
||||
@@ -61,88 +197,108 @@ class TagesschauIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.tagesschau.de/multimedia/video/video-102303~_bab-sendung-211.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.tagesschau.de/100sekunden/index.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# playlist article with collapsing sections
|
||||
'url': 'http://www.tagesschau.de/wirtschaft/faq-freihandelszone-eu-usa-101.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_FORMATS = {
|
||||
's': {'width': 256, 'height': 144, 'quality': 1},
|
||||
'm': {'width': 512, 'height': 288, 'quality': 2},
|
||||
'l': {'width': 960, 'height': 544, 'quality': 3},
|
||||
}
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if TagesschauPlayerIE.suitable(url) else super(TagesschauIE, cls).suitable(url)
|
||||
|
||||
def _extract_formats(self, download_text, media_kind):
|
||||
links = re.finditer(
|
||||
r'<div class="button" title="(?P<title>[^"]*)"><a href="(?P<url>[^"]+)">(?P<name>.+?)</a></div>',
|
||||
download_text)
|
||||
formats = []
|
||||
for l in links:
|
||||
link_url = l.group('url')
|
||||
if not link_url:
|
||||
continue
|
||||
format_id = self._search_regex(
|
||||
r'.*/[^/.]+\.([^/]+)\.[^/.]+$', link_url, 'format ID',
|
||||
default=determine_ext(link_url))
|
||||
format = {
|
||||
'format_id': format_id,
|
||||
'url': l.group('url'),
|
||||
'format_name': l.group('name'),
|
||||
}
|
||||
title = l.group('title')
|
||||
if title:
|
||||
if media_kind.lower() == 'video':
|
||||
m = re.match(
|
||||
r'''(?x)
|
||||
Video:\s*(?P<vcodec>[a-zA-Z0-9/._-]+)\s*&\#10;
|
||||
(?P<width>[0-9]+)x(?P<height>[0-9]+)px&\#10;
|
||||
(?P<vbr>[0-9]+)kbps&\#10;
|
||||
Audio:\s*(?P<abr>[0-9]+)kbps,\s*(?P<audio_desc>[A-Za-z\.0-9]+)&\#10;
|
||||
Größe:\s*(?P<filesize_approx>[0-9.,]+\s+[a-zA-Z]*B)''',
|
||||
title)
|
||||
if m:
|
||||
format.update({
|
||||
'format_note': m.group('audio_desc'),
|
||||
'vcodec': m.group('vcodec'),
|
||||
'width': int(m.group('width')),
|
||||
'height': int(m.group('height')),
|
||||
'abr': int(m.group('abr')),
|
||||
'vbr': int(m.group('vbr')),
|
||||
'filesize_approx': parse_filesize(m.group('filesize_approx')),
|
||||
})
|
||||
else:
|
||||
m = re.match(
|
||||
r'(?P<format>.+?)-Format\s*:\s*(?P<abr>\d+)kbps\s*,\s*(?P<note>.+)',
|
||||
title)
|
||||
if m:
|
||||
format.update({
|
||||
'format_note': '%s, %s' % (m.group('format'), m.group('note')),
|
||||
'vcodec': 'none',
|
||||
'abr': int(m.group('abr')),
|
||||
})
|
||||
formats.append(format)
|
||||
self._sort_formats(formats)
|
||||
return formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id') or mobj.group('path')
|
||||
display_id = video_id.lstrip('-')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
player_url = self._html_search_meta(
|
||||
'twitter:player', webpage, 'player URL', default=None)
|
||||
if player_url:
|
||||
playerpage = self._download_webpage(
|
||||
player_url, display_id, 'Downloading player page')
|
||||
title = self._html_search_regex(
|
||||
r'<span[^>]*class="headline"[^>]*>(.+?)</span>',
|
||||
webpage, 'title', default=None) or self._og_search_title(webpage)
|
||||
|
||||
formats = []
|
||||
for media in re.finditer(
|
||||
r'''(?x)
|
||||
(?P<q_url>["\'])(?P<url>http://media.+?)(?P=q_url)
|
||||
,\s*type:(?P<q_type>["\'])(?P<type>video|audio)/(?P<ext>.+?)(?P=q_type)
|
||||
(?:,\s*quality:(?P<q_quality>["\'])(?P<quality>.+?)(?P=q_quality))?
|
||||
''', playerpage):
|
||||
url = media.group('url')
|
||||
type_ = media.group('type')
|
||||
ext = media.group('ext')
|
||||
res = media.group('quality')
|
||||
f = {
|
||||
'format_id': '%s_%s' % (res, ext) if res else ext,
|
||||
'url': url,
|
||||
'ext': ext,
|
||||
'vcodec': 'none' if type_ == 'audio' else None,
|
||||
}
|
||||
f.update(self._FORMATS.get(res, {}))
|
||||
formats.append(f)
|
||||
thumbnail = self._og_search_thumbnail(playerpage)
|
||||
title = self._og_search_title(webpage).strip()
|
||||
description = self._og_search_description(webpage).strip()
|
||||
else:
|
||||
DOWNLOAD_REGEX = r'(?s)<p>Wir bieten dieses (?P<kind>Video|Audio) in folgenden Formaten zum Download an:</p>\s*<div class="controls">(?P<links>.*?)</div>\s*<p>'
|
||||
|
||||
webpage_type = self._og_search_property('type', webpage, default=None)
|
||||
if webpage_type == 'website': # Article
|
||||
entries = []
|
||||
for num, (entry_title, media_kind, download_text) in enumerate(re.findall(
|
||||
r'(?s)<p[^>]+class="infotext"[^>]*>\s*(?:<a[^>]+>)?\s*<strong>(.+?)</strong>.*?</p>.*?%s' % DOWNLOAD_REGEX,
|
||||
webpage), 1):
|
||||
entries.append({
|
||||
'id': '%s-%d' % (display_id, num),
|
||||
'title': '%s' % entry_title,
|
||||
'formats': self._extract_formats(download_text, media_kind),
|
||||
})
|
||||
if len(entries) > 1:
|
||||
return self.playlist_result(entries, display_id, title)
|
||||
formats = entries[0]['formats']
|
||||
else: # Assume single video
|
||||
download_text = self._search_regex(
|
||||
r'(?s)<p>Wir bieten dieses Video in folgenden Formaten zum Download an:</p>\s*<div class="controls">(.*?)</div>\s*<p>',
|
||||
webpage, 'download links')
|
||||
links = re.finditer(
|
||||
r'<div class="button" title="(?P<title>[^"]*)"><a href="(?P<url>[^"]+)">(?P<name>.+?)</a></div>',
|
||||
download_text)
|
||||
formats = []
|
||||
for l in links:
|
||||
format_id = self._search_regex(
|
||||
r'.*/[^/.]+\.([^/]+)\.[^/.]+', l.group('url'), 'format ID')
|
||||
format = {
|
||||
'format_id': format_id,
|
||||
'url': l.group('url'),
|
||||
'format_name': l.group('name'),
|
||||
}
|
||||
m = re.match(
|
||||
r'''(?x)
|
||||
Video:\s*(?P<vcodec>[a-zA-Z0-9/._-]+)\s*&\#10;
|
||||
(?P<width>[0-9]+)x(?P<height>[0-9]+)px&\#10;
|
||||
(?P<vbr>[0-9]+)kbps&\#10;
|
||||
Audio:\s*(?P<abr>[0-9]+)kbps,\s*(?P<audio_desc>[A-Za-z\.0-9]+)&\#10;
|
||||
Größe:\s*(?P<filesize_approx>[0-9.,]+\s+[a-zA-Z]*B)''',
|
||||
l.group('title'))
|
||||
if m:
|
||||
format.update({
|
||||
'format_note': m.group('audio_desc'),
|
||||
'vcodec': m.group('vcodec'),
|
||||
'width': int(m.group('width')),
|
||||
'height': int(m.group('height')),
|
||||
'abr': int(m.group('abr')),
|
||||
'vbr': int(m.group('vbr')),
|
||||
'filesize_approx': parse_filesize(m.group('filesize_approx')),
|
||||
})
|
||||
formats.append(format)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<p class="teasertext">(.*?)</p>',
|
||||
webpage, 'description', default=None)
|
||||
title = self._html_search_regex(
|
||||
r'<span class="headline".*?>(.*?)</span>', webpage, 'title')
|
||||
DOWNLOAD_REGEX, webpage, 'download links', group='links')
|
||||
media_kind = self._search_regex(
|
||||
DOWNLOAD_REGEX, webpage, 'media kind', default='Video', group='kind')
|
||||
formats = self._extract_formats(download_text, media_kind)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<p class="teasertext">(.*?)</p>',
|
||||
webpage, 'description', default=None)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
||||
@@ -27,7 +27,7 @@ class TEDIE(InfoExtractor):
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
|
||||
'md5': 'fc94ac279feebbce69f21c0c6ee82810',
|
||||
'md5': '0de43ac406aa3e4ea74b66c9c7789b13',
|
||||
'info_dict': {
|
||||
'id': '102',
|
||||
'ext': 'mp4',
|
||||
@@ -37,21 +37,26 @@ class TEDIE(InfoExtractor):
|
||||
'consciousness, but that half the time our brains are '
|
||||
'actively fooling us.'),
|
||||
'uploader': 'Dan Dennett',
|
||||
'width': 854,
|
||||
'width': 853,
|
||||
'duration': 1308,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.ted.com/watch/ted-institute/ted-bcg/vishal-sikka-the-beauty-and-power-of-algorithms',
|
||||
'md5': '226f4fb9c62380d11b7995efa4c87994',
|
||||
'md5': 'b899ac15e345fb39534d913f7606082b',
|
||||
'info_dict': {
|
||||
'id': 'vishal-sikka-the-beauty-and-power-of-algorithms',
|
||||
'id': 'tSVI8ta_P4w',
|
||||
'ext': 'mp4',
|
||||
'title': 'Vishal Sikka: The beauty and power of algorithms',
|
||||
'thumbnail': 're:^https?://.+\.jpg',
|
||||
'description': 'Adaptive, intelligent, and consistent, algorithms are emerging as the ultimate app for everything from matching consumers to products to assessing medical diagnoses. Vishal Sikka shares his appreciation for the algorithm, charting both its inherent beauty and its growing power.',
|
||||
}
|
||||
'description': 'md5:6261fdfe3e02f4f579cbbfc00aff73f4',
|
||||
'upload_date': '20140122',
|
||||
'uploader_id': 'TEDInstitute',
|
||||
'uploader': 'TED Institute',
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}, {
|
||||
'url': 'http://www.ted.com/talks/gabby_giffords_and_mark_kelly_be_passionate_be_courageous_be_your_best',
|
||||
'md5': '71b3ab2f4233012dce09d515c9c39ce2',
|
||||
'info_dict': {
|
||||
'id': '1972',
|
||||
'ext': 'mp4',
|
||||
@@ -102,9 +107,9 @@ class TEDIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
_NATIVE_FORMATS = {
|
||||
'low': {'preference': 1, 'width': 320, 'height': 180},
|
||||
'medium': {'preference': 2, 'width': 512, 'height': 288},
|
||||
'high': {'preference': 3, 'width': 854, 'height': 480},
|
||||
'low': {'width': 320, 'height': 180},
|
||||
'medium': {'width': 512, 'height': 288},
|
||||
'high': {'width': 854, 'height': 480},
|
||||
}
|
||||
|
||||
def _extract_info(self, webpage):
|
||||
@@ -171,15 +176,21 @@ class TEDIE(InfoExtractor):
|
||||
if finfo:
|
||||
f.update(finfo)
|
||||
|
||||
http_url = None
|
||||
for format_id, resources in talk_info['resources'].items():
|
||||
if format_id == 'h264':
|
||||
for resource in resources:
|
||||
h264_url = resource.get('file')
|
||||
if not h264_url:
|
||||
continue
|
||||
bitrate = int_or_none(resource.get('bitrate'))
|
||||
formats.append({
|
||||
'url': resource['file'],
|
||||
'url': h264_url,
|
||||
'format_id': '%s-%sk' % (format_id, bitrate),
|
||||
'tbr': bitrate,
|
||||
})
|
||||
if re.search('\d+k', h264_url):
|
||||
http_url = h264_url
|
||||
elif format_id == 'rtmp':
|
||||
streamer = talk_info.get('streamer')
|
||||
if not streamer:
|
||||
@@ -195,16 +206,24 @@ class TEDIE(InfoExtractor):
|
||||
'tbr': int_or_none(resource.get('bitrate')),
|
||||
})
|
||||
elif format_id == 'hls':
|
||||
hls_formats = self._extract_m3u8_formats(
|
||||
resources.get('stream'), video_name, 'mp4', m3u8_id=format_id)
|
||||
for f in hls_formats:
|
||||
if f.get('format_id') == 'hls-meta':
|
||||
continue
|
||||
if not f.get('height'):
|
||||
f['vcodec'] = 'none'
|
||||
else:
|
||||
f['acodec'] = 'none'
|
||||
formats.extend(hls_formats)
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
resources.get('stream'), video_name, 'mp4', m3u8_id=format_id, fatal=False))
|
||||
|
||||
m3u8_formats = list(filter(
|
||||
lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
|
||||
formats))
|
||||
if http_url:
|
||||
for m3u8_format in m3u8_formats:
|
||||
bitrate = self._search_regex(r'(\d+k)', m3u8_format['url'], 'bitrate', default=None)
|
||||
if not bitrate:
|
||||
continue
|
||||
f = m3u8_format.copy()
|
||||
f.update({
|
||||
'url': re.sub(r'\d+k', bitrate, http_url),
|
||||
'format_id': m3u8_format['format_id'].replace('hls', 'http'),
|
||||
'protocol': 'http',
|
||||
})
|
||||
formats.append(f)
|
||||
|
||||
audio_download = talk_info.get('audioDownload')
|
||||
if audio_download:
|
||||
@@ -212,7 +231,6 @@ class TEDIE(InfoExtractor):
|
||||
'url': audio_download,
|
||||
'format_id': 'audio',
|
||||
'vcodec': 'none',
|
||||
'preference': -0.5,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
@@ -254,7 +272,11 @@ class TEDIE(InfoExtractor):
|
||||
|
||||
config_json = self._html_search_regex(
|
||||
r'"pages\.jwplayer"\s*,\s*({.+?})\s*\)\s*</script>',
|
||||
webpage, 'config')
|
||||
webpage, 'config', default=None)
|
||||
if not config_json:
|
||||
embed_url = self._search_regex(
|
||||
r"<iframe[^>]+class='pages-video-embed__video__object'[^>]+src='([^']+)'", webpage, 'embed url')
|
||||
return self.url_result(self._proto_relative_url(embed_url))
|
||||
config = json.loads(config_json)['config']
|
||||
video_url = config['video']['url']
|
||||
thumbnail = config.get('image', {}).get('url')
|
||||
|
||||
@@ -159,11 +159,11 @@ class ThePlatformIE(ThePlatformBaseIE):
|
||||
def str_to_hex(str):
|
||||
return binascii.b2a_hex(str.encode('ascii')).decode('ascii')
|
||||
|
||||
def hex_to_str(hex):
|
||||
return binascii.a2b_hex(hex)
|
||||
def hex_to_bytes(hex):
|
||||
return binascii.a2b_hex(hex.encode('ascii'))
|
||||
|
||||
relative_path = re.match(r'https?://link.theplatform.com/s/([^?]+)', url).group(1)
|
||||
clear_text = hex_to_str(flags + expiration_date + str_to_hex(relative_path))
|
||||
clear_text = hex_to_bytes(flags + expiration_date + str_to_hex(relative_path))
|
||||
checksum = hmac.new(sig_key.encode('ascii'), clear_text, hashlib.sha1).hexdigest()
|
||||
sig = flags + expiration_date + checksum + str_to_hex(sig_secret)
|
||||
return '%s&sig=%s' % (url, sig)
|
||||
@@ -269,6 +269,7 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
|
||||
'timestamp': 1391824260,
|
||||
'duration': 467.0,
|
||||
'categories': ['MSNBC/Issues/Democrats', 'MSNBC/Issues/Elections/Election 2016'],
|
||||
'uploader': 'NBCU-NEWS',
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import codecs
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -10,22 +9,24 @@ from ..utils import (
|
||||
int_or_none,
|
||||
sanitized_Request,
|
||||
urlencode_postdata,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class TubiTvIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?tubitv\.com/video\?id=(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?tubitv\.com/video/(?P<id>[0-9]+)'
|
||||
_LOGIN_URL = 'http://tubitv.com/login'
|
||||
_NETRC_MACHINE = 'tubitv'
|
||||
_TEST = {
|
||||
'url': 'http://tubitv.com/video?id=54411&title=The_Kitchen_Musical_-_EP01',
|
||||
'url': 'http://tubitv.com/video/283829/the_comedian_at_the_friday',
|
||||
'info_dict': {
|
||||
'id': '54411',
|
||||
'id': '283829',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Kitchen Musical - EP01',
|
||||
'thumbnail': 're:^https?://.*\.png$',
|
||||
'description': 'md5:37532716166069b353e8866e71fefae7',
|
||||
'duration': 2407,
|
||||
'title': 'The Comedian at The Friday',
|
||||
'description': 'A stand up comedian is forced to look at the decisions in his life while on a one week trip to the west coast.',
|
||||
'uploader': 'Indie Rights Films',
|
||||
'upload_date': '20160111',
|
||||
'timestamp': 1452555979,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'HLS download',
|
||||
@@ -55,27 +56,31 @@ class TubiTvIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_json(
|
||||
'http://tubitv.com/oz/videos/%s/content' % video_id, video_id)
|
||||
title = video_data['n']
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
if re.search(r"<(?:DIV|div) class='login-required-screen'>", webpage):
|
||||
self.raise_login_required('This video requires login')
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
duration = int_or_none(self._html_search_meta(
|
||||
'video:duration', webpage, 'duration'))
|
||||
|
||||
apu = self._search_regex(r"apu='([^']+)'", webpage, 'apu')
|
||||
m3u8_url = codecs.decode(apu, 'rot_13')[::-1]
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
|
||||
formats = self._extract_m3u8_formats(
|
||||
video_data['mh'], video_id, 'mp4', 'm3u8_native')
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
for sub in video_data.get('sb', []):
|
||||
sub_url = sub.get('u')
|
||||
if not sub_url:
|
||||
continue
|
||||
subtitles.setdefault(sub.get('l', 'en'), []).append({
|
||||
'url': sub_url,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'subtitles': subtitles,
|
||||
'thumbnail': video_data.get('ph'),
|
||||
'description': video_data.get('d'),
|
||||
'duration': int_or_none(video_data.get('s')),
|
||||
'timestamp': parse_iso8601(video_data.get('u')),
|
||||
'uploader': video_data.get('on'),
|
||||
}
|
||||
|
||||
@@ -58,7 +58,9 @@ class TvigleIE(InfoExtractor):
|
||||
if not video_id:
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._html_search_regex(
|
||||
r'class="video-preview current_playing" id="(\d+)">',
|
||||
(r'<div[^>]+class=["\']player["\'][^>]+id=["\'](\d+)',
|
||||
r'var\s+cloudId\s*=\s*["\'](\d+)',
|
||||
r'class="video-preview current_playing" id="(\d+)"'),
|
||||
webpage, 'video id')
|
||||
|
||||
video_data = self._download_json(
|
||||
@@ -81,10 +83,10 @@ class TvigleIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for vcodec, fmts in item['videos'].items():
|
||||
if vcodec == 'hls':
|
||||
continue
|
||||
for format_id, video_url in fmts.items():
|
||||
if format_id == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', m3u8_id=vcodec))
|
||||
continue
|
||||
height = self._search_regex(
|
||||
r'^(\d+)[pP]$', format_id, 'height', default=None)
|
||||
|
||||
@@ -32,7 +32,22 @@ class TwentyMinutenIE(InfoExtractor):
|
||||
'title': '«Wir müssen mutig nach vorne schauen»',
|
||||
'description': 'Kein Land sei innovativer als die Schweiz, sagte Johann Schneider-Ammann in seiner Neujahrsansprache. Das Land müsse aber seine Hausaufgaben machen.',
|
||||
'thumbnail': 'http://www.20min.ch/images/content/2/2/0/22050469/10/teaserbreit.jpg'
|
||||
}
|
||||
},
|
||||
'skip': '"This video is no longer available" is shown both on the web page and in the downloaded file.',
|
||||
}, {
|
||||
# YouTube embed
|
||||
'url': 'http://www.20min.ch/ro/sports/football/story/Il-marque-une-bicyclette-de-plus-de-30-metres--21115184',
|
||||
'md5': 'cec64d59aa01c0ed9dbba9cf639dd82f',
|
||||
'info_dict': {
|
||||
'id': 'ivM7A7SpDOs',
|
||||
'ext': 'mp4',
|
||||
'title': 'GOLAZO DE CHILENA DE JAVI GÓMEZ, FINALISTA AL BALÓN DE CLM 2016',
|
||||
'description': 'md5:903c92fbf2b2f66c09de514bc25e9f5a',
|
||||
'upload_date': '20160424',
|
||||
'uploader': 'RTVCM Castilla-La Mancha',
|
||||
'uploader_id': 'RTVCM',
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}, {
|
||||
'url': 'http://www.20min.ch/videotv/?cid=44&vid=468738',
|
||||
'only_matching': True,
|
||||
@@ -48,6 +63,12 @@ class TwentyMinutenIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
youtube_url = self._html_search_regex(
|
||||
r'<iframe[^>]+src="((?:https?:)?//www\.youtube\.com/embed/[^"]+)"',
|
||||
webpage, 'YouTube embed URL', default=None)
|
||||
if youtube_url is not None:
|
||||
return self.url_result(youtube_url, 'Youtube')
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<h1>.*?<span>(.+?)</span></h1>',
|
||||
webpage, 'title', default=None)
|
||||
|
||||
@@ -49,6 +49,7 @@ class UnistraIE(InfoExtractor):
|
||||
'format_id': format_id,
|
||||
'quality': quality(format_id)
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<title>UTV - (.*?)</', webpage, 'title')
|
||||
|
||||
@@ -13,8 +13,12 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class SnagFilmsEmbedIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:www|embed)\.)?snagfilms\.com/embed/player\?.*\bfilmId=(?P<id>[\da-f-]{36})'
|
||||
class ViewLiftBaseIE(InfoExtractor):
|
||||
_DOMAINS_REGEX = '(?:snagfilms|snagxtreme|funnyforfree|kiddovid|winnersview|monumentalsportsnetwork|vayafilm)\.com|kesari\.tv'
|
||||
|
||||
|
||||
class ViewLiftEmbedIE(ViewLiftBaseIE):
|
||||
_VALID_URL = r'https?://(?:(?:www|embed)\.)?(?:%s)/embed/player\?.*\bfilmId=(?P<id>[\da-f-]{36})' % ViewLiftBaseIE._DOMAINS_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'http://embed.snagfilms.com/embed/player?filmId=74849a00-85a9-11e1-9660-123139220831&w=500',
|
||||
'md5': '2924e9215c6eff7a55ed35b72276bd93',
|
||||
@@ -40,7 +44,7 @@ class SnagFilmsEmbedIE(InfoExtractor):
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:embed\.)?snagfilms\.com/embed/player.+?)\1',
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:embed\.)?(?:%s)/embed/player.+?)\1' % ViewLiftBaseIE._DOMAINS_REGEX,
|
||||
webpage)
|
||||
if mobj:
|
||||
return mobj.group('url')
|
||||
@@ -55,6 +59,7 @@ class SnagFilmsEmbedIE(InfoExtractor):
|
||||
'Film %s is not playable in your area.' % video_id, expected=True)
|
||||
|
||||
formats = []
|
||||
has_bitrate = False
|
||||
for source in self._parse_json(js_to_json(self._search_regex(
|
||||
r'(?s)sources:\s*(\[.+?\]),', webpage, 'json')), video_id):
|
||||
file_ = source.get('file')
|
||||
@@ -63,22 +68,25 @@ class SnagFilmsEmbedIE(InfoExtractor):
|
||||
type_ = source.get('type')
|
||||
ext = determine_ext(file_)
|
||||
format_id = source.get('label') or ext
|
||||
if all(v == 'm3u8' for v in (type_, ext)):
|
||||
if all(v == 'm3u8' or v == 'hls' for v in (type_, ext)):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
file_, video_id, 'mp4', m3u8_id='hls'))
|
||||
else:
|
||||
bitrate = int_or_none(self._search_regex(
|
||||
[r'(\d+)kbps', r'_\d{1,2}x\d{1,2}_(\d{3,})\.%s' % ext],
|
||||
file_, 'bitrate', default=None))
|
||||
if not has_bitrate and bitrate:
|
||||
has_bitrate = True
|
||||
height = int_or_none(self._search_regex(
|
||||
r'^(\d+)[pP]$', format_id, 'height', default=None))
|
||||
formats.append({
|
||||
'url': file_,
|
||||
'format_id': format_id,
|
||||
'format_id': 'http-%s%s' % (format_id, ('-%dk' % bitrate if bitrate else '')),
|
||||
'tbr': bitrate,
|
||||
'height': height,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
field_preference = None if has_bitrate else ('height', 'tbr', 'format_id')
|
||||
self._sort_formats(formats, field_preference)
|
||||
|
||||
title = self._search_regex(
|
||||
[r"title\s*:\s*'([^']+)'", r'<title>([^<]+)</title>'],
|
||||
@@ -91,8 +99,8 @@ class SnagFilmsEmbedIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class SnagFilmsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?snagfilms\.com/(?:films/title|show)/(?P<id>[^?#]+)'
|
||||
class ViewLiftIE(ViewLiftBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>%s)/(?:films/title|show|(?:news/)?videos?)/(?P<id>[^?#]+)' % ViewLiftBaseIE._DOMAINS_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'http://www.snagfilms.com/films/title/lost_for_life',
|
||||
'md5': '19844f897b35af219773fd63bdec2942',
|
||||
@@ -127,10 +135,16 @@ class SnagFilmsIE(InfoExtractor):
|
||||
# Film is not available.
|
||||
'url': 'http://www.snagfilms.com/show/augie_alone/flirting',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.winnersview.com/videos/the-good-son',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.kesari.tv/news/video/1461919076414',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
domain, display_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
@@ -170,7 +184,7 @@ class SnagFilmsIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'http://embed.snagfilms.com/embed/player?filmId=%s' % film_id,
|
||||
'url': 'http://%s/embed/player?filmId=%s' % (domain, film_id),
|
||||
'id': film_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
@@ -178,4 +192,5 @@ class SnagFilmsIE(InfoExtractor):
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'categories': categories,
|
||||
'ie_key': 'ViewLiftEmbed',
|
||||
}
|
||||
@@ -1,10 +1,11 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
from ..utils import (
|
||||
@@ -14,6 +15,7 @@ from ..utils import (
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
HEADRequest,
|
||||
url_basename,
|
||||
)
|
||||
|
||||
|
||||
@@ -75,11 +77,11 @@ class ViewsterIE(InfoExtractor):
|
||||
|
||||
_ACCEPT_HEADER = 'application/json, text/javascript, */*; q=0.01'
|
||||
|
||||
def _download_json(self, url, video_id, note='Downloading JSON metadata', fatal=True):
|
||||
def _download_json(self, url, video_id, note='Downloading JSON metadata', fatal=True, query={}):
|
||||
request = sanitized_Request(url)
|
||||
request.add_header('Accept', self._ACCEPT_HEADER)
|
||||
request.add_header('Auth-token', self._AUTH_TOKEN)
|
||||
return super(ViewsterIE, self)._download_json(request, video_id, note, fatal=fatal)
|
||||
return super(ViewsterIE, self)._download_json(request, video_id, note, fatal=fatal, query=query)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@@ -114,43 +116,85 @@ class ViewsterIE(InfoExtractor):
|
||||
return self.playlist_result(entries, video_id, title, description)
|
||||
|
||||
formats = []
|
||||
for media_type in ('application/f4m+xml', 'application/x-mpegURL', 'video/mp4'):
|
||||
media = self._download_json(
|
||||
'https://public-api.viewster.com/movies/%s/video?mediaType=%s'
|
||||
% (entry_id, compat_urllib_parse.quote(media_type)),
|
||||
video_id, 'Downloading %s JSON' % media_type, fatal=False)
|
||||
if not media:
|
||||
continue
|
||||
video_url = media.get('Uri')
|
||||
if not video_url:
|
||||
continue
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'f4m':
|
||||
video_url += '&' if '?' in video_url else '?'
|
||||
video_url += 'hdcore=3.2.0&plugin=flowplayer-3.2.0.1'
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
video_url, video_id, f4m_id='hds'))
|
||||
elif ext == 'm3u8':
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', m3u8_id='hls',
|
||||
fatal=False) # m3u8 sometimes fail
|
||||
if m3u8_formats:
|
||||
formats.extend(m3u8_formats)
|
||||
else:
|
||||
format_id = media.get('Bitrate')
|
||||
f = {
|
||||
'url': video_url,
|
||||
'format_id': 'mp4-%s' % format_id,
|
||||
'height': int_or_none(media.get('Height')),
|
||||
'width': int_or_none(media.get('Width')),
|
||||
'preference': 1,
|
||||
}
|
||||
if format_id and not f['height']:
|
||||
f['height'] = int_or_none(self._search_regex(
|
||||
r'^(\d+)[pP]$', format_id, 'height', default=None))
|
||||
formats.append(f)
|
||||
for language_set in info.get('LanguageSets', []):
|
||||
manifest_url = None
|
||||
m3u8_formats = []
|
||||
audio = language_set.get('Audio') or ''
|
||||
subtitle = language_set.get('Subtitle') or ''
|
||||
base_format_id = audio
|
||||
if subtitle:
|
||||
base_format_id += '-%s' % subtitle
|
||||
|
||||
if not formats and not info.get('LanguageSets') and not info.get('VODSettings'):
|
||||
def concat(suffix, sep='-'):
|
||||
return (base_format_id + '%s%s' % (sep, suffix)) if base_format_id else suffix
|
||||
|
||||
for media_type in ('application/f4m+xml', 'application/x-mpegURL', 'video/mp4'):
|
||||
media = self._download_json(
|
||||
'https://public-api.viewster.com/movies/%s/video' % entry_id,
|
||||
video_id, 'Downloading %s JSON' % concat(media_type, ' '), fatal=False, query={
|
||||
'mediaType': media_type,
|
||||
'language': audio,
|
||||
'subtitle': subtitle,
|
||||
})
|
||||
if not media:
|
||||
continue
|
||||
video_url = media.get('Uri')
|
||||
if not video_url:
|
||||
continue
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'f4m':
|
||||
manifest_url = video_url
|
||||
video_url += '&' if '?' in video_url else '?'
|
||||
video_url += 'hdcore=3.2.0&plugin=flowplayer-3.2.0.1'
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
video_url, video_id, f4m_id=concat('hds')))
|
||||
elif ext == 'm3u8':
|
||||
manifest_url = video_url
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', m3u8_id=concat('hls'),
|
||||
fatal=False) # m3u8 sometimes fail
|
||||
if m3u8_formats:
|
||||
formats.extend(m3u8_formats)
|
||||
else:
|
||||
qualities_basename = self._search_regex(
|
||||
'/([^/]+)\.csmil/',
|
||||
manifest_url, 'qualities basename', default=None)
|
||||
if not qualities_basename:
|
||||
continue
|
||||
QUALITIES_RE = r'((,\d+k)+,?)'
|
||||
qualities = self._search_regex(
|
||||
QUALITIES_RE, qualities_basename,
|
||||
'qualities', default=None)
|
||||
if not qualities:
|
||||
continue
|
||||
qualities = list(map(lambda q: int(q[:-1]), qualities.strip(',').split(',')))
|
||||
qualities.sort()
|
||||
http_template = re.sub(QUALITIES_RE, r'%dk', qualities_basename)
|
||||
http_url_basename = url_basename(video_url)
|
||||
if m3u8_formats:
|
||||
self._sort_formats(m3u8_formats)
|
||||
m3u8_formats = list(filter(
|
||||
lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
|
||||
m3u8_formats))
|
||||
if len(qualities) == len(m3u8_formats):
|
||||
for q, m3u8_format in zip(qualities, m3u8_formats):
|
||||
f = m3u8_format.copy()
|
||||
f.update({
|
||||
'url': video_url.replace(http_url_basename, http_template % q),
|
||||
'format_id': f['format_id'].replace('hls', 'http'),
|
||||
'protocol': 'http',
|
||||
})
|
||||
formats.append(f)
|
||||
else:
|
||||
for q in qualities:
|
||||
formats.append({
|
||||
'url': video_url.replace(http_url_basename, http_template % q),
|
||||
'ext': 'mp4',
|
||||
'format_id': 'http-%d' % q,
|
||||
'tbr': q,
|
||||
})
|
||||
|
||||
if not formats and not info.get('VODSettings'):
|
||||
self.raise_geo_restricted()
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
@@ -282,10 +282,10 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
pass_url = url + '/check-password'
|
||||
password_request = sanitized_Request(pass_url, data)
|
||||
password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
password_request.add_header('Referer', url)
|
||||
return self._download_json(
|
||||
password_request, video_id,
|
||||
'Verifying the password',
|
||||
'Wrong password')
|
||||
'Verifying the password', 'Wrong password')
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
@@ -1,9 +1,13 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import division, unicode_literals
|
||||
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
dict_get,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
)
|
||||
@@ -31,16 +35,76 @@ class VLiveIE(InfoExtractor):
|
||||
webpage = self._download_webpage(
|
||||
'http://www.vlive.tv/video/%s' % video_id, video_id)
|
||||
|
||||
long_video_id = self._search_regex(
|
||||
r'vlive\.tv\.video\.ajax\.request\.handler\.init\(\s*"[0-9]+"\s*,\s*"[^"]*"\s*,\s*"([^"]+)"',
|
||||
webpage, 'long video id')
|
||||
# UTC+x - UTC+9 (KST)
|
||||
tz = time.altzone if time.localtime().tm_isdst == 1 else time.timezone
|
||||
tz_offset = -tz // 60 - 9 * 60
|
||||
self._set_cookie('vlive.tv', 'timezoneOffset', '%d' % tz_offset)
|
||||
|
||||
key = self._search_regex(
|
||||
r'vlive\.tv\.video\.ajax\.request\.handler\.init\(\s*"[0-9]+"\s*,\s*"[^"]*"\s*,\s*"[^"]+"\s*,\s*"([^"]+)"',
|
||||
webpage, 'key')
|
||||
status_params = self._download_json(
|
||||
'http://www.vlive.tv/video/status?videoSeq=%s' % video_id,
|
||||
video_id, 'Downloading JSON status',
|
||||
headers={'Referer': url.encode('utf-8')})
|
||||
status = status_params.get('status')
|
||||
air_start = status_params.get('onAirStartAt', '')
|
||||
is_live = status_params.get('isLive')
|
||||
|
||||
video_params = self._search_regex(
|
||||
r'vlive\.tv\.video\.ajax\.request\.handler\.init\((.+)\)',
|
||||
webpage, 'video params')
|
||||
live_params, long_video_id, key = re.split(
|
||||
r'"\s*,\s*"', video_params)[1:4]
|
||||
|
||||
if status == 'LIVE_ON_AIR' or status == 'BIG_EVENT_ON_AIR':
|
||||
live_params = self._parse_json('"%s"' % live_params, video_id)
|
||||
live_params = self._parse_json(live_params, video_id)
|
||||
return self._live(video_id, webpage, live_params)
|
||||
elif status == 'VOD_ON_AIR' or status == 'BIG_EVENT_INTRO':
|
||||
if long_video_id and key:
|
||||
return self._replay(video_id, webpage, long_video_id, key)
|
||||
elif is_live:
|
||||
status = 'LIVE_END'
|
||||
else:
|
||||
status = 'COMING_SOON'
|
||||
|
||||
if status == 'LIVE_END':
|
||||
raise ExtractorError('Uploading for replay. Please wait...',
|
||||
expected=True)
|
||||
elif status == 'COMING_SOON':
|
||||
raise ExtractorError('Coming soon! %s' % air_start, expected=True)
|
||||
elif status == 'CANCELED':
|
||||
raise ExtractorError('We are sorry, '
|
||||
'but the live broadcast has been canceled.',
|
||||
expected=True)
|
||||
else:
|
||||
raise ExtractorError('Unknown status %s' % status)
|
||||
|
||||
def _get_common_fields(self, webpage):
|
||||
title = self._og_search_title(webpage)
|
||||
creator = self._html_search_regex(
|
||||
r'<div[^>]+class="info_area"[^>]*>\s*<a\s+[^>]*>([^<]+)',
|
||||
webpage, 'creator', fatal=False)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
return {
|
||||
'title': title,
|
||||
'creator': creator,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
def _live(self, video_id, webpage, live_params):
|
||||
formats = []
|
||||
for vid in live_params.get('resolutions', []):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
vid['cdnUrl'], video_id, 'mp4',
|
||||
m3u8_id=vid.get('name'),
|
||||
fatal=False, live=True))
|
||||
self._sort_formats(formats)
|
||||
|
||||
return dict(self._get_common_fields(webpage),
|
||||
id=video_id,
|
||||
formats=formats,
|
||||
is_live=True)
|
||||
|
||||
def _replay(self, video_id, webpage, long_video_id, key):
|
||||
playinfo = self._download_json(
|
||||
'http://global.apis.naver.com/rmcnmv/rmcnmv/vod_play_videoInfo.json?%s'
|
||||
% compat_urllib_parse_urlencode({
|
||||
@@ -62,11 +126,6 @@ class VLiveIE(InfoExtractor):
|
||||
} for vid in playinfo.get('videos', {}).get('list', []) if vid.get('source')]
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
creator = self._html_search_regex(
|
||||
r'<div[^>]+class="info_area"[^>]*>\s*<a\s+[^>]*>([^<]+)',
|
||||
webpage, 'creator', fatal=False)
|
||||
|
||||
view_count = int_or_none(playinfo.get('meta', {}).get('count'))
|
||||
|
||||
subtitles = {}
|
||||
@@ -77,12 +136,8 @@ class VLiveIE(InfoExtractor):
|
||||
'ext': 'vtt',
|
||||
'url': caption['source']}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'creator': creator,
|
||||
'thumbnail': thumbnail,
|
||||
'view_count': view_count,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
return dict(self._get_common_fields(webpage),
|
||||
id=video_id,
|
||||
formats=formats,
|
||||
view_count=view_count,
|
||||
subtitles=subtitles)
|
||||
|
||||
@@ -11,61 +11,27 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class SexyKarmaIE(InfoExtractor):
|
||||
IE_DESC = 'Sexy Karma and Watch Indian Porn'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:sexykarma\.com|watchindianporn\.net)/(?:[^/]+/)*video/(?P<display_id>[^/]+)-(?P<id>[a-zA-Z0-9]+)\.html'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.sexykarma.com/gonewild/video/taking-a-quick-pee-yHI70cOyIHt.html',
|
||||
'md5': 'b9798e7d1ef1765116a8f516c8091dbd',
|
||||
class WatchIndianPornIE(InfoExtractor):
|
||||
IE_DESC = 'Watch Indian Porn'
|
||||
_VALID_URL = r'https?://(?:www\.)?watchindianporn\.net/(?:[^/]+/)*video/(?P<display_id>[^/]+)-(?P<id>[a-zA-Z0-9]+)\.html'
|
||||
_TEST = {
|
||||
'url': 'http://www.watchindianporn.net/video/hot-milf-from-kerala-shows-off-her-gorgeous-large-breasts-on-camera-RZa2avywNPa.html',
|
||||
'md5': '249589a164dde236ec65832bfce17440',
|
||||
'info_dict': {
|
||||
'id': 'yHI70cOyIHt',
|
||||
'display_id': 'taking-a-quick-pee',
|
||||
'id': 'RZa2avywNPa',
|
||||
'display_id': 'hot-milf-from-kerala-shows-off-her-gorgeous-large-breasts-on-camera',
|
||||
'ext': 'mp4',
|
||||
'title': 'Taking a quick pee.',
|
||||
'title': 'Hot milf from kerala shows off her gorgeous large breasts on camera',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'wildginger7',
|
||||
'upload_date': '20141008',
|
||||
'duration': 22,
|
||||
'uploader': 'LoveJay',
|
||||
'upload_date': '20160428',
|
||||
'duration': 226,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'categories': list,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.sexykarma.com/gonewild/video/pot-pixie-tribute-8Id6EZPbuHf.html',
|
||||
'md5': 'dd216c68d29b49b12842b9babe762a5d',
|
||||
'info_dict': {
|
||||
'id': '8Id6EZPbuHf',
|
||||
'display_id': 'pot-pixie-tribute',
|
||||
'ext': 'mp4',
|
||||
'title': 'pot_pixie tribute',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'banffite',
|
||||
'upload_date': '20141013',
|
||||
'duration': 16,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'categories': list,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.watchindianporn.net/video/desi-dancer-namrata-stripping-completely-nude-and-dancing-on-a-hot-number-dW2mtctxJfs.html',
|
||||
'md5': '9afb80675550406ed9a63ac2819ef69d',
|
||||
'info_dict': {
|
||||
'id': 'dW2mtctxJfs',
|
||||
'display_id': 'desi-dancer-namrata-stripping-completely-nude-and-dancing-on-a-hot-number',
|
||||
'ext': 'mp4',
|
||||
'title': 'Desi dancer namrata stripping completely nude and dancing on a hot number',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'Don',
|
||||
'upload_date': '20140213',
|
||||
'duration': 83,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'categories': list,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}]
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
@@ -109,6 +75,9 @@ class SexyKarmaIE(InfoExtractor):
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'url': video_url,
|
||||
'http_headers': {
|
||||
'Referer': url,
|
||||
},
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
@@ -4,16 +4,22 @@ from __future__ import unicode_literals
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class WSJIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://video-api\.wsj\.com/api-video/player/iframe\.html\?guid=(?P<id>[a-zA-Z0-9-]+)'
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?:
|
||||
video-api\.wsj\.com/api-video/player/iframe\.html\?guid=|
|
||||
(?:www\.)?wsj\.com/video/[^/]+/
|
||||
)
|
||||
(?P<id>[a-zA-Z0-9-]+)'''
|
||||
IE_DESC = 'Wall Street Journal'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://video-api.wsj.com/api-video/player/iframe.html?guid=1BD01A4C-BFE8-40A5-A42F-8A8AF9898B1A',
|
||||
'md5': '9747d7a6ebc2f4df64b981e1dde9efa9',
|
||||
'md5': 'e230a5bb249075e40793b655a54a02e4',
|
||||
'info_dict': {
|
||||
'id': '1BD01A4C-BFE8-40A5-A42F-8A8AF9898B1A',
|
||||
'ext': 'mp4',
|
||||
@@ -24,65 +30,60 @@ class WSJIE(InfoExtractor):
|
||||
'duration': 90,
|
||||
'title': 'Bills Coach Rex Ryan Updates His Old Jets Tattoo',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.wsj.com/video/can-alphabet-build-a-smarter-city/359DDAA8-9AC1-489C-82E6-0429C1E430E0.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
bitrates = [128, 174, 264, 320, 464, 664, 1264]
|
||||
api_url = (
|
||||
'http://video-api.wsj.com/api-video/find_all_videos.asp?'
|
||||
'type=guid&count=1&query=%s&'
|
||||
'fields=hls,adZone,thumbnailList,guid,state,secondsUntilStartTime,'
|
||||
'author,description,name,linkURL,videoStillURL,duration,videoURL,'
|
||||
'adCategory,catastrophic,linkShortURL,doctypeID,youtubeID,'
|
||||
'titletag,rssURL,wsj-section,wsj-subsection,allthingsd-section,'
|
||||
'allthingsd-subsection,sm-section,sm-subsection,provider,'
|
||||
'formattedCreationDate,keywords,keywordsOmniture,column,editor,'
|
||||
'emailURL,emailPartnerID,showName,omnitureProgramName,'
|
||||
'omnitureVideoFormat,linkRelativeURL,touchCastID,'
|
||||
'omniturePublishDate,%s') % (
|
||||
video_id, ','.join('video%dkMP4Url' % br for br in bitrates))
|
||||
'type=guid&count=1&query=%s&fields=type,hls,videoMP4List,'
|
||||
'thumbnailList,author,description,name,duration,videoURL,'
|
||||
'titletag,formattedCreationDate,keywords,editor' % video_id)
|
||||
info = self._download_json(api_url, video_id)['items'][0]
|
||||
|
||||
# Thumbnails are conveniently in the correct format already
|
||||
thumbnails = info.get('thumbnailList')
|
||||
creator = info.get('author')
|
||||
uploader_id = info.get('editor')
|
||||
categories = info.get('keywords')
|
||||
duration = int_or_none(info.get('duration'))
|
||||
upload_date = unified_strdate(
|
||||
info.get('formattedCreationDate'), day_first=False)
|
||||
title = info.get('name', info.get('titletag'))
|
||||
|
||||
formats = [{
|
||||
'format_id': 'f4m',
|
||||
'format_note': 'f4m (meta URL)',
|
||||
'url': info['videoURL'],
|
||||
}]
|
||||
if info.get('hls'):
|
||||
formats = []
|
||||
|
||||
f4m_url = info.get('videoURL')
|
||||
if f4m_url:
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
f4m_url, video_id, f4m_id='hds', fatal=False))
|
||||
|
||||
m3u8_url = info.get('hls')
|
||||
if m3u8_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
info['hls'], video_id, ext='mp4',
|
||||
preference=0, entry_protocol='m3u8_native'))
|
||||
for br in bitrates:
|
||||
field = 'video%dkMP4Url' % br
|
||||
if info.get(field):
|
||||
formats.append({
|
||||
'format_id': 'mp4-%d' % br,
|
||||
'container': 'mp4',
|
||||
'tbr': br,
|
||||
'url': info[field],
|
||||
})
|
||||
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
|
||||
|
||||
for v in info.get('videoMP4List', []):
|
||||
mp4_url = v.get('url')
|
||||
if not mp4_url:
|
||||
continue
|
||||
tbr = int_or_none(v.get('bitrate'))
|
||||
formats.append({
|
||||
'url': mp4_url,
|
||||
'format_id': 'http' + ('-%d' % tbr if tbr else ''),
|
||||
'tbr': tbr,
|
||||
'width': int_or_none(v.get('width')),
|
||||
'height': int_or_none(v.get('height')),
|
||||
'fps': float_or_none(v.get('fps')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
'creator': creator,
|
||||
'uploader_id': uploader_id,
|
||||
'duration': duration,
|
||||
'upload_date': upload_date,
|
||||
# Thumbnails are conveniently in the correct format already
|
||||
'thumbnails': info.get('thumbnailList'),
|
||||
'creator': info.get('author'),
|
||||
'uploader_id': info.get('editor'),
|
||||
'duration': int_or_none(info.get('duration')),
|
||||
'upload_date': unified_strdate(info.get(
|
||||
'formattedCreationDate'), day_first=False),
|
||||
'title': title,
|
||||
'categories': categories,
|
||||
'categories': info.get('keywords'),
|
||||
}
|
||||
|
||||
158
youtube_dl/extractor/xiami.py
Normal file
158
youtube_dl/extractor/xiami.py
Normal file
@@ -0,0 +1,158 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class XiamiBaseIE(InfoExtractor):
|
||||
_API_BASE_URL = 'http://www.xiami.com/song/playlist/cat/json/id'
|
||||
|
||||
def _extract_track(self, track, track_id=None):
|
||||
title = track['title']
|
||||
track_url = self._decrypt(track['location'])
|
||||
|
||||
subtitles = {}
|
||||
lyrics_url = track.get('lyric_url') or track.get('lyric')
|
||||
if lyrics_url and lyrics_url.startswith('http'):
|
||||
subtitles['origin'] = [{'url': lyrics_url}]
|
||||
|
||||
return {
|
||||
'id': track.get('song_id') or track_id,
|
||||
'url': track_url,
|
||||
'title': title,
|
||||
'thumbnail': track.get('pic') or track.get('album_pic'),
|
||||
'duration': int_or_none(track.get('length')),
|
||||
'creator': track.get('artist', '').split(';')[0],
|
||||
'track': title,
|
||||
'album': track.get('album_name'),
|
||||
'artist': track.get('artist'),
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def _extract_tracks(self, item_id, typ=None):
|
||||
playlist = self._download_json(
|
||||
'%s/%s%s' % (self._API_BASE_URL, item_id, '/type/%s' % typ if typ else ''), item_id)
|
||||
return [
|
||||
self._extract_track(track, item_id)
|
||||
for track in playlist['data']['trackList']]
|
||||
|
||||
@staticmethod
|
||||
def _decrypt(origin):
|
||||
n = int(origin[0])
|
||||
origin = origin[1:]
|
||||
short_lenth = len(origin) // n
|
||||
long_num = len(origin) - short_lenth * n
|
||||
l = tuple()
|
||||
for i in range(0, n):
|
||||
length = short_lenth
|
||||
if i < long_num:
|
||||
length += 1
|
||||
l += (origin[0:length], )
|
||||
origin = origin[length:]
|
||||
ans = ''
|
||||
for i in range(0, short_lenth + 1):
|
||||
for j in range(0, n):
|
||||
if len(l[j]) > i:
|
||||
ans += l[j][i]
|
||||
return compat_urllib_parse_unquote(ans).replace('^', '0')
|
||||
|
||||
|
||||
class XiamiSongIE(XiamiBaseIE):
|
||||
IE_NAME = 'xiami:song'
|
||||
IE_DESC = '虾米音乐'
|
||||
_VALID_URL = r'https?://(?:www\.)?xiami\.com/song/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.xiami.com/song/1775610518',
|
||||
'md5': '521dd6bea40fd5c9c69f913c232cb57e',
|
||||
'info_dict': {
|
||||
'id': '1775610518',
|
||||
'ext': 'mp3',
|
||||
'title': 'Woman',
|
||||
'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg',
|
||||
'duration': 265,
|
||||
'creator': 'HONNE',
|
||||
'track': 'Woman',
|
||||
'album': 'Woman',
|
||||
'artist': 'HONNE',
|
||||
'subtitles': {
|
||||
'origin': [{
|
||||
'ext': 'lrc',
|
||||
}],
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.xiami.com/song/1775256504',
|
||||
'md5': '932a3abd45c6aa2b1fdbe028fcb4c4fc',
|
||||
'info_dict': {
|
||||
'id': '1775256504',
|
||||
'ext': 'mp3',
|
||||
'title': '悟空',
|
||||
'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg',
|
||||
'duration': 200,
|
||||
'creator': '戴荃',
|
||||
'track': '悟空',
|
||||
'album': '悟空',
|
||||
'artist': '戴荃',
|
||||
'subtitles': {
|
||||
'origin': [{
|
||||
'ext': 'lrc',
|
||||
}],
|
||||
},
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self._extract_tracks(self._match_id(url))[0]
|
||||
|
||||
|
||||
class XiamiPlaylistBaseIE(XiamiBaseIE):
|
||||
def _real_extract(self, url):
|
||||
item_id = self._match_id(url)
|
||||
return self.playlist_result(self._extract_tracks(item_id, self._TYPE), item_id)
|
||||
|
||||
|
||||
class XiamiAlbumIE(XiamiPlaylistBaseIE):
|
||||
IE_NAME = 'xiami:album'
|
||||
IE_DESC = '虾米音乐 - 专辑'
|
||||
_VALID_URL = r'https?://(?:www\.)?xiami\.com/album/(?P<id>[0-9]+)'
|
||||
_TYPE = '1'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.xiami.com/album/2100300444',
|
||||
'info_dict': {
|
||||
'id': '2100300444',
|
||||
},
|
||||
'playlist_count': 10,
|
||||
}, {
|
||||
'url': 'http://www.xiami.com/album/512288?spm=a1z1s.6843761.1110925389.6.hhE9p9',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class XiamiArtistIE(XiamiPlaylistBaseIE):
|
||||
IE_NAME = 'xiami:artist'
|
||||
IE_DESC = '虾米音乐 - 歌手'
|
||||
_VALID_URL = r'https?://(?:www\.)?xiami\.com/artist/(?P<id>[0-9]+)'
|
||||
_TYPE = '2'
|
||||
_TEST = {
|
||||
'url': 'http://www.xiami.com/artist/2132?spm=0.0.0.0.dKaScp',
|
||||
'info_dict': {
|
||||
'id': '2132',
|
||||
},
|
||||
'playlist_count': 20,
|
||||
}
|
||||
|
||||
|
||||
class XiamiCollectionIE(XiamiPlaylistBaseIE):
|
||||
IE_NAME = 'xiami:collection'
|
||||
IE_DESC = '虾米音乐 - 精选集'
|
||||
_VALID_URL = r'https?://(?:www\.)?xiami\.com/collect/(?P<id>[0-9]+)'
|
||||
_TYPE = '3'
|
||||
_TEST = {
|
||||
'url': 'http://www.xiami.com/collect/156527391?spm=a1z1s.2943601.6856193.12.4jpBnr',
|
||||
'info_dict': {
|
||||
'id': '156527391',
|
||||
},
|
||||
'playlist_mincount': 29,
|
||||
}
|
||||
@@ -2,15 +2,15 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_chr,
|
||||
compat_ord,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_filesize,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@ class XMinusIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '4542',
|
||||
'ext': 'mp3',
|
||||
'title': 'Леонид Агутин-Песенка шофера',
|
||||
'title': 'Леонид Агутин-Песенка шофёра',
|
||||
'duration': 156,
|
||||
'tbr': 320,
|
||||
'filesize_approx': 5900000,
|
||||
@@ -36,38 +36,41 @@ class XMinusIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
artist = self._html_search_regex(
|
||||
r'minus_track\.artist="(.+?)"', webpage, 'artist')
|
||||
r'<a[^>]+href="/artist/\d+">([^<]+)</a>', webpage, 'artist')
|
||||
title = artist + '-' + self._html_search_regex(
|
||||
r'minus_track\.title="(.+?)"', webpage, 'title')
|
||||
duration = int_or_none(self._html_search_regex(
|
||||
r'minus_track\.dur_sec=\'([0-9]*?)\'',
|
||||
r'<span[^>]+class="minustrack-full-title(?:\s+[^"]+)?"[^>]*>([^<]+)', webpage, 'title')
|
||||
duration = parse_duration(self._html_search_regex(
|
||||
r'<span[^>]+class="player-duration(?:\s+[^"]+)?"[^>]*>([^<]+)',
|
||||
webpage, 'duration', fatal=False))
|
||||
filesize_approx = parse_filesize(self._html_search_regex(
|
||||
r'<div id="finfo"[^>]*>\s*↓\s*([0-9.]+\s*[a-zA-Z][bB])',
|
||||
webpage, 'approximate filesize', fatal=False))
|
||||
tbr = int_or_none(self._html_search_regex(
|
||||
r'<div class="quality[^"]*"></div>\s*([0-9]+)\s*kbps',
|
||||
webpage, 'bitrate', fatal=False))
|
||||
mobj = re.search(
|
||||
r'<div[^>]+class="dw-info(?:\s+[^"]+)?"[^>]*>(?P<tbr>\d+)\s*кбит/c\s+(?P<filesize>[0-9.]+)\s*мб</div>',
|
||||
webpage)
|
||||
tbr = filesize_approx = None
|
||||
if mobj:
|
||||
filesize_approx = float(mobj.group('filesize')) * 1000000
|
||||
tbr = float(mobj.group('tbr'))
|
||||
view_count = int_or_none(self._html_search_regex(
|
||||
r'<div class="quality.*?► ([0-9]+)',
|
||||
r'<span><[^>]+class="icon-chart-bar".*?>(\d+)</span>',
|
||||
webpage, 'view count', fatal=False))
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<div id="song_texts">(.*?)</div><br',
|
||||
r'(?s)<pre[^>]+id="lyrics-original"[^>]*>(.*?)</pre>',
|
||||
webpage, 'song lyrics', fatal=False)
|
||||
if description:
|
||||
description = re.sub(' *\r *', '\n', description)
|
||||
|
||||
enc_token = self._html_search_regex(
|
||||
r'minus_track\.s?tkn="(.+?)"', webpage, 'enc_token')
|
||||
token = ''.join(
|
||||
c if pos == 3 else compat_chr(compat_ord(c) - 1)
|
||||
for pos, c in enumerate(reversed(enc_token)))
|
||||
video_url = 'http://x-minus.org/dwlf/%s/%s.mp3' % (video_id, token)
|
||||
k = self._search_regex(
|
||||
r'<div[^>]+id="player-bottom"[^>]+data-k="([^"]+)">', webpage,
|
||||
'encoded data')
|
||||
h = time.time() / 3600
|
||||
a = sum(map(int, [compat_ord(c) for c in k])) + int(video_id) + h
|
||||
video_url = 'http://x-minus.me/dl/minus?id=%s&tkn2=%df%d' % (video_id, a, h)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
# The extension is unknown until actual downloading
|
||||
'ext': 'mp3',
|
||||
'duration': duration,
|
||||
'filesize_approx': filesize_approx,
|
||||
'tbr': tbr,
|
||||
|
||||
@@ -24,7 +24,7 @@ from .nbc import NBCSportsVPlayerIE
|
||||
|
||||
class YahooIE(InfoExtractor):
|
||||
IE_DESC = 'Yahoo screen and movies'
|
||||
_VALID_URL = r'(?P<url>(?P<host>https?://(?:[a-zA-Z]{2}\.)?[\da-zA-Z_-]+\.yahoo\.com)/(?:[^/]+/)*(?P<display_id>.+)?-(?P<id>[0-9]+)(?:-[a-z]+)?\.html)'
|
||||
_VALID_URL = r'(?P<url>(?P<host>https?://(?:[a-zA-Z]{2}\.)?[\da-zA-Z_-]+\.yahoo\.com)/(?:[^/]+/)*(?P<display_id>.+)?-(?P<id>[0-9]+)(?:-[a-z]+)?(?:\.html)?)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
|
||||
@@ -38,7 +38,7 @@ class YahooIE(InfoExtractor):
|
||||
},
|
||||
{
|
||||
'url': 'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html',
|
||||
'md5': 'd6e6fc6e1313c608f316ddad7b82b306',
|
||||
'md5': 'c3466d2b6d5dd6b9f41ba9ed04c24b23',
|
||||
'info_dict': {
|
||||
'id': 'd1dedf8c-d58c-38c3-8963-e899929ae0a9',
|
||||
'ext': 'mp4',
|
||||
@@ -49,7 +49,7 @@ class YahooIE(InfoExtractor):
|
||||
},
|
||||
{
|
||||
'url': 'https://screen.yahoo.com/community/community-sizzle-reel-203225340.html?format=embed',
|
||||
'md5': '60e8ac193d8fb71997caa8fce54c6460',
|
||||
'md5': '75ffabdb87c16d4ffe8c036dc4d1c136',
|
||||
'info_dict': {
|
||||
'id': '4fe78544-8d48-39d8-97cd-13f205d9fcdb',
|
||||
'ext': 'mp4',
|
||||
@@ -59,15 +59,15 @@ class YahooIE(InfoExtractor):
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'https://tw.screen.yahoo.com/election-2014-askmayor/敢問市長-黃秀霜批賴清德-非常高傲-033009720.html',
|
||||
'md5': '3a09cf59349cfaddae1797acc3c087fc',
|
||||
'url': 'https://tw.news.yahoo.com/%E6%95%A2%E5%95%8F%E5%B8%82%E9%95%B7%20%E9%BB%83%E7%A7%80%E9%9C%9C%E6%89%B9%E8%B3%B4%E6%B8%85%E5%BE%B7%20%E9%9D%9E%E5%B8%B8%E9%AB%98%E5%82%B2-034024051.html',
|
||||
'md5': '9035d38f88b1782682a3e89f985be5bb',
|
||||
'info_dict': {
|
||||
'id': 'cac903b3-fcf4-3c14-b632-643ab541712f',
|
||||
'ext': 'mp4',
|
||||
'title': '敢問市長/黃秀霜批賴清德「非常高傲」',
|
||||
'description': '直言台南沒捷運 交通居五都之末',
|
||||
'duration': 396,
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://uk.screen.yahoo.com/editor-picks/cute-raccoon-freed-drain-using-091756545.html',
|
||||
@@ -89,17 +89,32 @@ class YahooIE(InfoExtractor):
|
||||
'title': 'Program that makes hockey more affordable not offered in Manitoba',
|
||||
'description': 'md5:c54a609f4c078d92b74ffb9bf1f496f4',
|
||||
'duration': 121,
|
||||
}
|
||||
},
|
||||
'skip': 'Video gone',
|
||||
}, {
|
||||
'url': 'https://ca.finance.yahoo.com/news/hackers-sony-more-trouble-well-154609075.html',
|
||||
'md5': '226a895aae7e21b0129e2a2006fe9690',
|
||||
'info_dict': {
|
||||
'id': 'e624c4bc-3389-34de-9dfc-025f74943409',
|
||||
'ext': 'mp4',
|
||||
'title': '\'The Interview\' TV Spot: War',
|
||||
'description': 'The Interview',
|
||||
'duration': 30,
|
||||
}
|
||||
'id': '154609075',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': 'f8e336c6b66f503282e5f719641d6565',
|
||||
'info_dict': {
|
||||
'id': 'e624c4bc-3389-34de-9dfc-025f74943409',
|
||||
'ext': 'mp4',
|
||||
'title': '\'The Interview\' TV Spot: War',
|
||||
'description': 'The Interview',
|
||||
'duration': 30,
|
||||
},
|
||||
}, {
|
||||
'md5': '958bcb90b4d6df71c56312137ee1cd5a',
|
||||
'info_dict': {
|
||||
'id': '1fc8ada0-718e-3abe-a450-bf31f246d1a9',
|
||||
'ext': 'mp4',
|
||||
'title': '\'The Interview\' TV Spot: Guys',
|
||||
'description': 'The Interview',
|
||||
'duration': 30,
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
'url': 'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html',
|
||||
'md5': '88e209b417f173d86186bef6e4d1f160',
|
||||
@@ -119,10 +134,11 @@ class YahooIE(InfoExtractor):
|
||||
'title': 'Connect the Dots: Dark Side of Virgo',
|
||||
'description': 'md5:1428185051cfd1949807ad4ff6d3686a',
|
||||
'duration': 201,
|
||||
}
|
||||
},
|
||||
'skip': 'Domain name in.lifestyle.yahoo.com gone',
|
||||
}, {
|
||||
'url': 'https://www.yahoo.com/movies/v/true-story-trailer-173000497.html',
|
||||
'md5': '989396ae73d20c6f057746fb226aa215',
|
||||
'md5': 'b17ac378b1134fa44370fb27db09a744',
|
||||
'info_dict': {
|
||||
'id': '071c4013-ce30-3a93-a5b2-e0413cd4a9d1',
|
||||
'ext': 'mp4',
|
||||
@@ -141,6 +157,9 @@ class YahooIE(InfoExtractor):
|
||||
'ext': 'flv',
|
||||
'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d',
|
||||
'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson',
|
||||
'upload_date': '20150313',
|
||||
'uploader': 'NBCU-SPORTS',
|
||||
'timestamp': 1426270238,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://tw.news.yahoo.com/-100120367.html',
|
||||
@@ -148,7 +167,7 @@ class YahooIE(InfoExtractor):
|
||||
}, {
|
||||
# Query result is embedded in webpage, but explicit request to video API fails with geo restriction
|
||||
'url': 'https://screen.yahoo.com/community/communitary-community-episode-1-ladders-154501237.html',
|
||||
'md5': '4fbafb9c9b6f07aa8f870629f6671b35',
|
||||
'md5': '1ddbf7c850777548438e5c4f147c7b8c',
|
||||
'info_dict': {
|
||||
'id': '1f32853c-a271-3eef-8cb6-f6d6872cb504',
|
||||
'ext': 'mp4',
|
||||
@@ -166,6 +185,17 @@ class YahooIE(InfoExtractor):
|
||||
'description': 'While they play feuding fathers in \'Daddy\'s Home,\' star Will Ferrell & Mark Wahlberg share their true feelings on parenthood.',
|
||||
},
|
||||
},
|
||||
{
|
||||
# config['models']['applet_model']['data']['sapi'] has no query
|
||||
'url': 'https://www.yahoo.com/music/livenation/event/galactic-2016',
|
||||
'md5': 'dac0c72d502bc5facda80c9e6d5c98db',
|
||||
'info_dict': {
|
||||
'id': 'a6015640-e9e5-3efb-bb60-05589a183919',
|
||||
'ext': 'mp4',
|
||||
'description': 'Galactic',
|
||||
'title': 'Dolla Diva (feat. Maggie Koerner)',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -174,19 +204,26 @@ class YahooIE(InfoExtractor):
|
||||
page_id = mobj.group('id')
|
||||
url = mobj.group('url')
|
||||
host = mobj.group('host')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
webpage, urlh = self._download_webpage_handle(url, display_id)
|
||||
if 'err=404' in urlh.geturl():
|
||||
raise ExtractorError('Video gone', expected=True)
|
||||
|
||||
# Look for iframed media first
|
||||
iframe_m = re.search(r'<iframe[^>]+src="(/video/.+?-\d+\.html\?format=embed.*?)"', webpage)
|
||||
if iframe_m:
|
||||
entries = []
|
||||
iframe_urls = re.findall(r'<iframe[^>]+src="(/video/.+?-\d+\.html\?format=embed.*?)"', webpage)
|
||||
for idx, iframe_url in enumerate(iframe_urls):
|
||||
iframepage = self._download_webpage(
|
||||
host + iframe_m.group(1), display_id, 'Downloading iframe webpage')
|
||||
host + iframe_url, display_id,
|
||||
note='Downloading iframe webpage for video #%d' % idx)
|
||||
items_json = self._search_regex(
|
||||
r'mediaItems: (\[.+?\])$', iframepage, 'items', flags=re.MULTILINE, default=None)
|
||||
if items_json:
|
||||
items = json.loads(items_json)
|
||||
video_id = items[0]['id']
|
||||
return self._get_info(video_id, display_id, webpage)
|
||||
entries.append(self._get_info(video_id, display_id, webpage))
|
||||
if entries:
|
||||
return self.playlist_result(entries, page_id)
|
||||
|
||||
# Look for NBCSports iframes
|
||||
nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
|
||||
if nbc_sports_url:
|
||||
@@ -202,7 +239,7 @@ class YahooIE(InfoExtractor):
|
||||
config = self._parse_json(config_json, display_id, fatal=False)
|
||||
if config:
|
||||
sapi = config.get('models', {}).get('applet_model', {}).get('data', {}).get('sapi')
|
||||
if sapi:
|
||||
if sapi and 'query' in sapi:
|
||||
return self._extract_info(display_id, sapi, webpage)
|
||||
|
||||
items_json = self._search_regex(
|
||||
|
||||
@@ -18,9 +18,23 @@ from ..utils import (
|
||||
class YandexMusicBaseIE(InfoExtractor):
|
||||
@staticmethod
|
||||
def _handle_error(response):
|
||||
error = response.get('error')
|
||||
if error:
|
||||
raise ExtractorError(error, expected=True)
|
||||
if isinstance(response, dict):
|
||||
error = response.get('error')
|
||||
if error:
|
||||
raise ExtractorError(error, expected=True)
|
||||
|
||||
def _download_webpage(self, *args, **kwargs):
|
||||
webpage = super(YandexMusicBaseIE, self)._download_webpage(*args, **kwargs)
|
||||
if 'Нам очень жаль, но запросы, поступившие с вашего IP-адреса, похожи на автоматические.' in webpage:
|
||||
raise ExtractorError(
|
||||
'YandexMusic has considered youtube-dl requests automated and '
|
||||
'asks you to solve a CAPTCHA. You can either wait for some '
|
||||
'time until unblocked and optionally use --sleep-interval '
|
||||
'in future or alternatively you can go to https://music.yandex.ru/ '
|
||||
'solve CAPTCHA, then export cookies and pass cookie file to '
|
||||
'youtube-dl with --cookies',
|
||||
expected=True)
|
||||
return webpage
|
||||
|
||||
def _download_json(self, *args, **kwargs):
|
||||
response = super(YandexMusicBaseIE, self)._download_json(*args, **kwargs)
|
||||
@@ -47,7 +61,8 @@ class YandexMusicTrackIE(YandexMusicBaseIE):
|
||||
'album_artist': 'Carlo Ambrosio',
|
||||
'artist': 'Carlo Ambrosio & Fabio Di Bari, Carlo Ambrosio',
|
||||
'release_year': '2009',
|
||||
}
|
||||
},
|
||||
'skip': 'Travis CI servers blocked by YandexMusic',
|
||||
}
|
||||
|
||||
def _get_track_url(self, storage_dir, track_id):
|
||||
@@ -139,6 +154,7 @@ class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE):
|
||||
'title': 'Carlo Ambrosio - Gypsy Soul (2009)',
|
||||
},
|
||||
'playlist_count': 50,
|
||||
'skip': 'Travis CI servers blocked by YandexMusic',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -171,6 +187,7 @@ class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE):
|
||||
'description': 'md5:3b9f27b0efbe53f2ee1e844d07155cc9',
|
||||
},
|
||||
'playlist_count': 6,
|
||||
'skip': 'Travis CI servers blocked by YandexMusic',
|
||||
}, {
|
||||
# playlist exceeding the limit of 150 tracks shipped with webpage (see
|
||||
# https://github.com/rg3/youtube-dl/issues/6666)
|
||||
@@ -180,6 +197,7 @@ class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE):
|
||||
'title': 'Музыка 90-х',
|
||||
},
|
||||
'playlist_count': 310,
|
||||
'skip': 'Travis CI servers blocked by YandexMusic',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -64,6 +64,14 @@ class YoukuIE(InfoExtractor):
|
||||
'params': {
|
||||
'videopassword': '100600',
|
||||
},
|
||||
}, {
|
||||
# /play/get.json contains streams with "channel_type":"tail"
|
||||
'url': 'http://v.youku.com/v_show/id_XOTUxMzg4NDMy.html',
|
||||
'info_dict': {
|
||||
'id': 'XOTUxMzg4NDMy',
|
||||
'title': '我的世界☆明月庄主☆车震猎杀☆杀人艺术Minecraft',
|
||||
},
|
||||
'playlist_count': 6,
|
||||
}]
|
||||
|
||||
def construct_video_urls(self, data):
|
||||
@@ -92,6 +100,8 @@ class YoukuIE(InfoExtractor):
|
||||
|
||||
fileid_dict = {}
|
||||
for stream in data['stream']:
|
||||
if stream.get('channel_type') == 'tail':
|
||||
continue
|
||||
format = stream.get('stream_type')
|
||||
fileid = stream['stream_fileid']
|
||||
fileid_dict[format] = fileid
|
||||
@@ -117,6 +127,8 @@ class YoukuIE(InfoExtractor):
|
||||
# generate video_urls
|
||||
video_urls_dict = {}
|
||||
for stream in data['stream']:
|
||||
if stream.get('channel_type') == 'tail':
|
||||
continue
|
||||
format = stream.get('stream_type')
|
||||
video_urls = []
|
||||
for dt in stream['segs']:
|
||||
@@ -253,6 +265,8 @@ class YoukuIE(InfoExtractor):
|
||||
# which one has all
|
||||
} for i in range(max(len(v.get('segs')) for v in data['stream']))]
|
||||
for stream in data['stream']:
|
||||
if stream.get('channel_type') == 'tail':
|
||||
continue
|
||||
fm = stream.get('stream_type')
|
||||
video_urls = video_urls_dict[fm]
|
||||
for video_url, seg, entry in zip(video_urls, stream['segs'], entries):
|
||||
|
||||
@@ -125,6 +125,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
if login_results is False:
|
||||
return False
|
||||
|
||||
error_msg = self._html_search_regex(
|
||||
r'<[^>]+id="errormsg_0_Passwd"[^>]*>([^<]+)<',
|
||||
login_results, 'error message', default=None)
|
||||
if error_msg:
|
||||
raise ExtractorError('Unable to login: %s' % error_msg, expected=True)
|
||||
|
||||
if re.search(r'id="errormsg_0_Passwd"', login_results) is not None:
|
||||
raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True)
|
||||
|
||||
@@ -2133,10 +2139,11 @@ class YoutubeSearchDateIE(YoutubeSearchIE):
|
||||
_EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
|
||||
|
||||
|
||||
class YoutubeSearchURLIE(InfoExtractor):
|
||||
class YoutubeSearchURLIE(YoutubePlaylistBaseInfoExtractor):
|
||||
IE_DESC = 'YouTube.com search URLs'
|
||||
IE_NAME = 'youtube:search_url'
|
||||
_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
|
||||
_VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
|
||||
'playlist_mincount': 5,
|
||||
@@ -2151,32 +2158,8 @@ class YoutubeSearchURLIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
query = compat_urllib_parse_unquote_plus(mobj.group('query'))
|
||||
|
||||
webpage = self._download_webpage(url, query)
|
||||
result_code = self._search_regex(
|
||||
r'(?s)<ol[^>]+class="item-section"(.*?)</ol>', webpage, 'result HTML')
|
||||
|
||||
part_codes = re.findall(
|
||||
r'(?s)<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*>(.*?)</h3>', result_code)
|
||||
entries = []
|
||||
for part_code in part_codes:
|
||||
part_title = self._html_search_regex(
|
||||
[r'(?s)title="([^"]+)"', r'>([^<]+)</a>'], part_code, 'item title', fatal=False)
|
||||
part_url_snippet = self._html_search_regex(
|
||||
r'(?s)href="([^"]+)"', part_code, 'item URL')
|
||||
part_url = compat_urlparse.urljoin(
|
||||
'https://www.youtube.com/', part_url_snippet)
|
||||
entries.append({
|
||||
'_type': 'url',
|
||||
'url': part_url,
|
||||
'title': part_title,
|
||||
})
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'entries': entries,
|
||||
'title': query,
|
||||
}
|
||||
return self.playlist_result(self._process_page(webpage), playlist_title=query)
|
||||
|
||||
|
||||
class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
|
||||
|
||||
@@ -425,8 +425,12 @@ def parseOpts(overrideArguments=None):
|
||||
help='Set file xattribute ytdl.filesize with expected filesize (experimental)')
|
||||
downloader.add_option(
|
||||
'--hls-prefer-native',
|
||||
dest='hls_prefer_native', action='store_true',
|
||||
help='Use the native HLS downloader instead of ffmpeg (experimental)')
|
||||
dest='hls_prefer_native', action='store_true', default=None,
|
||||
help='Use the native HLS downloader instead of ffmpeg')
|
||||
downloader.add_option(
|
||||
'--hls-prefer-ffmpeg',
|
||||
dest='hls_prefer_native', action='store_false', default=None,
|
||||
help='Use ffmpeg instead of the native HLS downloader')
|
||||
downloader.add_option(
|
||||
'--hls-use-mpegts',
|
||||
dest='hls_use_mpegts', action='store_true',
|
||||
|
||||
@@ -389,23 +389,30 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
|
||||
class FFmpegMetadataPP(FFmpegPostProcessor):
|
||||
def run(self, info):
|
||||
metadata = {}
|
||||
if info.get('title') is not None:
|
||||
metadata['title'] = info['title']
|
||||
if info.get('upload_date') is not None:
|
||||
metadata['date'] = info['upload_date']
|
||||
if info.get('artist') is not None:
|
||||
metadata['artist'] = info['artist']
|
||||
elif info.get('uploader') is not None:
|
||||
metadata['artist'] = info['uploader']
|
||||
elif info.get('uploader_id') is not None:
|
||||
metadata['artist'] = info['uploader_id']
|
||||
if info.get('description') is not None:
|
||||
metadata['description'] = info['description']
|
||||
metadata['comment'] = info['description']
|
||||
if info.get('webpage_url') is not None:
|
||||
metadata['purl'] = info['webpage_url']
|
||||
if info.get('album') is not None:
|
||||
metadata['album'] = info['album']
|
||||
|
||||
def add(meta_list, info_list=None):
|
||||
if not info_list:
|
||||
info_list = meta_list
|
||||
if not isinstance(meta_list, (list, tuple)):
|
||||
meta_list = (meta_list,)
|
||||
if not isinstance(info_list, (list, tuple)):
|
||||
info_list = (info_list,)
|
||||
for info_f in info_list:
|
||||
if info.get(info_f) is not None:
|
||||
for meta_f in meta_list:
|
||||
metadata[meta_f] = info[info_f]
|
||||
break
|
||||
|
||||
add('title', ('track', 'title'))
|
||||
add('date', 'upload_date')
|
||||
add(('description', 'comment'), 'description')
|
||||
add('purl', 'webpage_url')
|
||||
add('track', 'track_number')
|
||||
add('artist', ('artist', 'creator', 'uploader', 'uploader_id'))
|
||||
add('genre')
|
||||
add('album')
|
||||
add('album_artist')
|
||||
add('disc', 'disc_number')
|
||||
|
||||
if not metadata:
|
||||
self._downloader.to_screen('[ffmpeg] There isn\'t any metadata to add')
|
||||
|
||||
@@ -1540,44 +1540,46 @@ def parse_duration(s):
|
||||
|
||||
s = s.strip()
|
||||
|
||||
m = re.match(
|
||||
r'''(?ix)(?:P?T)?
|
||||
(?:
|
||||
(?P<only_mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*|
|
||||
(?P<only_hours>[0-9.]+)\s*(?:hours?)|
|
||||
|
||||
\s*(?P<hours_reversed>[0-9]+)\s*(?:[:h]|hours?)\s*(?P<mins_reversed>[0-9]+)\s*(?:[:m]|mins?\.?|minutes?)\s*|
|
||||
(?:
|
||||
days, hours, mins, secs, ms = [None] * 5
|
||||
m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?$', s)
|
||||
if m:
|
||||
days, hours, mins, secs, ms = m.groups()
|
||||
else:
|
||||
m = re.match(
|
||||
r'''(?ix)(?:P?T)?
|
||||
(?:
|
||||
(?:(?P<days>[0-9]+)\s*(?:[:d]|days?)\s*)?
|
||||
(?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*
|
||||
(?P<days>[0-9]+)\s*d(?:ays?)?\s*
|
||||
)?
|
||||
(?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*
|
||||
)?
|
||||
(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?
|
||||
)$''', s)
|
||||
if not m:
|
||||
return None
|
||||
res = 0
|
||||
if m.group('only_mins'):
|
||||
return float_or_none(m.group('only_mins'), invscale=60)
|
||||
if m.group('only_hours'):
|
||||
return float_or_none(m.group('only_hours'), invscale=60 * 60)
|
||||
if m.group('secs'):
|
||||
res += int(m.group('secs'))
|
||||
if m.group('mins_reversed'):
|
||||
res += int(m.group('mins_reversed')) * 60
|
||||
if m.group('mins'):
|
||||
res += int(m.group('mins')) * 60
|
||||
if m.group('hours'):
|
||||
res += int(m.group('hours')) * 60 * 60
|
||||
if m.group('hours_reversed'):
|
||||
res += int(m.group('hours_reversed')) * 60 * 60
|
||||
if m.group('days'):
|
||||
res += int(m.group('days')) * 24 * 60 * 60
|
||||
if m.group('ms'):
|
||||
res += float(m.group('ms'))
|
||||
return res
|
||||
(?:
|
||||
(?P<hours>[0-9]+)\s*h(?:ours?)?\s*
|
||||
)?
|
||||
(?:
|
||||
(?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
|
||||
)?
|
||||
(?:
|
||||
(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
|
||||
)?$''', s)
|
||||
if m:
|
||||
days, hours, mins, secs, ms = m.groups()
|
||||
else:
|
||||
m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)$', s)
|
||||
if m:
|
||||
hours, mins = m.groups()
|
||||
else:
|
||||
return None
|
||||
|
||||
duration = 0
|
||||
if secs:
|
||||
duration += float(secs)
|
||||
if mins:
|
||||
duration += float(mins) * 60
|
||||
if hours:
|
||||
duration += float(hours) * 60 * 60
|
||||
if days:
|
||||
duration += float(days) * 24 * 60 * 60
|
||||
if ms:
|
||||
duration += float(ms)
|
||||
return duration
|
||||
|
||||
|
||||
def prepend_extension(filename, ext, expected_real_ext=None):
|
||||
@@ -1933,6 +1935,9 @@ def error_to_compat_str(err):
|
||||
|
||||
|
||||
def mimetype2ext(mt):
|
||||
if mt is None:
|
||||
return None
|
||||
|
||||
ext = {
|
||||
'audio/mp4': 'm4a',
|
||||
}.get(mt)
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2016.04.19'
|
||||
__version__ = '2016.05.01'
|
||||
|
||||
Reference in New Issue
Block a user