mirror of
https://source.netsyms.com/Mirrors/youtube-dl
synced 2026-03-28 02:18:49 +00:00
Compare commits
25 Commits
2016.09.08
...
2016.09.11
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1c81476cbb | ||
|
|
bc9186c882 | ||
|
|
6599c72527 | ||
|
|
6bb05b32a9 | ||
|
|
fea74acad8 | ||
|
|
f01115c933 | ||
|
|
2cdbc06a1f | ||
|
|
2cb93afcd8 | ||
|
|
bfcda07a27 | ||
|
|
001a5fd3d7 | ||
|
|
1e35999c1e | ||
|
|
2512b17493 | ||
|
|
56c0ead4d3 | ||
|
|
7324243750 | ||
|
|
84a18e9b90 | ||
|
|
b29f842e0e | ||
|
|
f009fcac0d | ||
|
|
6c3affcb18 | ||
|
|
1e19ff2984 | ||
|
|
c6129feb7f | ||
|
|
bb5ebd4453 | ||
|
|
cb9cbd84ed | ||
|
|
4d5726b0d7 | ||
|
|
4614ad7b59 | ||
|
|
8d3737cda7 |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.08*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.08**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.11*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.11**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2016.09.08
|
||||
[debug] youtube-dl version 2016.09.11
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
||||
@@ -1,3 +1,12 @@
|
||||
version 2016.09.11
|
||||
|
||||
Extractors
|
||||
+ [pornhub] Extract categories and tags (#10499)
|
||||
+ [foxnews] Support Fox News articles (#10598)
|
||||
* [iwara] Fix extraction after relaunch (#10462, #3215)
|
||||
* [newgrounds] Fix uploader extraction (#10584)
|
||||
|
||||
|
||||
version 2016.09.08
|
||||
|
||||
Extractors
|
||||
|
||||
@@ -247,7 +247,8 @@
|
||||
- **Formula1**
|
||||
- **FOX**
|
||||
- **Foxgay**
|
||||
- **FoxNews**: Fox News and Fox Business Video
|
||||
- **foxnews**: Fox News and Fox Business Video
|
||||
- **foxnews:article**
|
||||
- **foxnews:insider**
|
||||
- **FoxSports**
|
||||
- **france2.fr:generation-quoi**
|
||||
@@ -326,6 +327,7 @@
|
||||
- **ivi**: ivi.ru
|
||||
- **ivi:compilation**: ivi.ru compilations
|
||||
- **ivideon**: Ivideon TV
|
||||
- **Iwara**
|
||||
- **Izlesene**
|
||||
- **JeuxVideo**
|
||||
- **Jove**
|
||||
@@ -339,6 +341,7 @@
|
||||
- **KarriereVideos**
|
||||
- **keek**
|
||||
- **KeezMovies**
|
||||
- **Ketnet**
|
||||
- **KhanAcademy**
|
||||
- **KickStarter**
|
||||
- **KonserthusetPlay**
|
||||
@@ -540,6 +543,7 @@
|
||||
- **podomatic**
|
||||
- **Pokemon**
|
||||
- **PolskieRadio**
|
||||
- **PolskieRadioCategory**
|
||||
- **PornCom**
|
||||
- **PornHd**
|
||||
- **PornHub**: PornHub and Thumbzilla
|
||||
@@ -701,9 +705,11 @@
|
||||
- **Telecinco**: telecinco.es, cuatro.com and mediaset.es
|
||||
- **Telegraaf**
|
||||
- **TeleMB**
|
||||
- **TeleQuebec**
|
||||
- **TeleTask**
|
||||
- **Telewebion**
|
||||
- **TF1**
|
||||
- **TFO**
|
||||
- **TheIntercept**
|
||||
- **ThePlatform**
|
||||
- **ThePlatformFeed**
|
||||
@@ -725,7 +731,6 @@
|
||||
- **ToypicsUser**: Toypics user profile
|
||||
- **TrailerAddict** (Currently broken)
|
||||
- **Trilulilu**
|
||||
- **trollvids**
|
||||
- **TruTV**
|
||||
- **Tube8**
|
||||
- **TubiTv**
|
||||
|
||||
@@ -100,6 +100,7 @@ class ABCIViewIE(InfoExtractor):
|
||||
IE_NAME = 'abc.net.au:iview'
|
||||
_VALID_URL = r'https?://iview\.abc\.net\.au/programs/[^/]+/(?P<id>[^/?#]+)'
|
||||
|
||||
# ABC iview programs are normally available for 14 days only.
|
||||
_TESTS = [{
|
||||
'url': 'http://iview.abc.net.au/programs/gardening-australia/FA1505V024S00',
|
||||
'md5': '979d10b2939101f0d27a06b79edad536',
|
||||
@@ -112,6 +113,7 @@ class ABCIViewIE(InfoExtractor):
|
||||
'uploader_id': 'abc1',
|
||||
'timestamp': 1471719600,
|
||||
},
|
||||
'skip': 'Video gone',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -23,6 +23,7 @@ class CanalplusIE(InfoExtractor):
|
||||
(?:(?:www|m)\.)?canalplus\.fr|
|
||||
(?:www\.)?piwiplus\.fr|
|
||||
(?:www\.)?d8\.tv|
|
||||
(?:www\.)?c8\.fr|
|
||||
(?:www\.)?d17\.tv|
|
||||
(?:www\.)?itele\.fr
|
||||
)/(?:(?:[^/]+/)*(?P<display_id>[^/?#&]+))?(?:\?.*\bvid=(?P<vid>\d+))?|
|
||||
@@ -35,6 +36,7 @@ class CanalplusIE(InfoExtractor):
|
||||
'canalplus': 'cplus',
|
||||
'piwiplus': 'teletoon',
|
||||
'd8': 'd8',
|
||||
'c8': 'd8',
|
||||
'd17': 'd17',
|
||||
'itele': 'itele',
|
||||
}
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import float_or_none
|
||||
|
||||
|
||||
class CanvasIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?canvas\.be/video/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<site_id>canvas|een)\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.canvas.be/video/de-afspraak/najaar-2015/de-afspraak-veilt-voor-de-warmste-week',
|
||||
'md5': 'ea838375a547ac787d4064d8c7860a6c',
|
||||
@@ -38,22 +40,42 @@ class CanvasIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.een.be/sorry-voor-alles/herbekijk-sorry-voor-alles',
|
||||
'info_dict': {
|
||||
'id': 'mz-ast-11a587f8-b921-4266-82e2-0bce3e80d07f',
|
||||
'display_id': 'herbekijk-sorry-voor-alles',
|
||||
'ext': 'mp4',
|
||||
'title': 'Herbekijk Sorry voor alles',
|
||||
'description': 'md5:8bb2805df8164e5eb95d6a7a29dc0dd3',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 3788.06,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.canvas.be/check-point/najaar-2016/de-politie-uw-vriend',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
site_id, display_id = mobj.group('site_id'), mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
title = self._search_regex(
|
||||
title = (self._search_regex(
|
||||
r'<h1[^>]+class="video__body__header__title"[^>]*>(.+?)</h1>',
|
||||
webpage, 'title', default=None) or self._og_search_title(webpage)
|
||||
webpage, 'title', default=None) or self._og_search_title(
|
||||
webpage)).strip()
|
||||
|
||||
video_id = self._html_search_regex(
|
||||
r'data-video=(["\'])(?P<id>.+?)\1', webpage, 'video id', group='id')
|
||||
|
||||
data = self._download_json(
|
||||
'https://mediazone.vrt.be/api/v1/canvas/assets/%s' % video_id, display_id)
|
||||
'https://mediazone.vrt.be/api/v1/%s/assets/%s'
|
||||
% (site_id, video_id), display_id)
|
||||
|
||||
formats = []
|
||||
for target in data['targetUrls']:
|
||||
|
||||
@@ -293,6 +293,7 @@ from .fox import FOXIE
|
||||
from .foxgay import FoxgayIE
|
||||
from .foxnews import (
|
||||
FoxNewsIE,
|
||||
FoxNewsArticleIE,
|
||||
FoxNewsInsiderIE,
|
||||
)
|
||||
from .foxsports import FoxSportsIE
|
||||
@@ -395,6 +396,7 @@ from .ivi import (
|
||||
IviCompilationIE
|
||||
)
|
||||
from .ivideon import IvideonIE
|
||||
from .iwara import IwaraIE
|
||||
from .izlesene import IzleseneIE
|
||||
from .jeuxvideo import JeuxVideoIE
|
||||
from .jove import JoveIE
|
||||
@@ -407,6 +409,7 @@ from .kankan import KankanIE
|
||||
from .karaoketv import KaraoketvIE
|
||||
from .karrierevideos import KarriereVideosIE
|
||||
from .keezmovies import KeezMoviesIE
|
||||
from .ketnet import KetnetIE
|
||||
from .khanacademy import KhanAcademyIE
|
||||
from .kickstarter import KickStarterIE
|
||||
from .keek import KeekIE
|
||||
@@ -670,7 +673,10 @@ from .pluralsight import (
|
||||
)
|
||||
from .podomatic import PodomaticIE
|
||||
from .pokemon import PokemonIE
|
||||
from .polskieradio import PolskieRadioIE
|
||||
from .polskieradio import (
|
||||
PolskieRadioIE,
|
||||
PolskieRadioCategoryIE,
|
||||
)
|
||||
from .porn91 import Porn91IE
|
||||
from .porncom import PornComIE
|
||||
from .pornhd import PornHdIE
|
||||
@@ -861,10 +867,12 @@ from .telebruxelles import TeleBruxellesIE
|
||||
from .telecinco import TelecincoIE
|
||||
from .telegraaf import TelegraafIE
|
||||
from .telemb import TeleMBIE
|
||||
from .telequebec import TeleQuebecIE
|
||||
from .teletask import TeleTaskIE
|
||||
from .telewebion import TelewebionIE
|
||||
from .testurl import TestURLIE
|
||||
from .tf1 import TF1IE
|
||||
from .tfo import TFOIE
|
||||
from .theintercept import TheInterceptIE
|
||||
from .theplatform import (
|
||||
ThePlatformIE,
|
||||
@@ -893,7 +901,6 @@ from .toutv import TouTvIE
|
||||
from .toypics import ToypicsUserIE, ToypicsIE
|
||||
from .traileraddict import TrailerAddictIE
|
||||
from .trilulilu import TriluliluIE
|
||||
from .trollvids import TrollvidsIE
|
||||
from .trutv import TruTVIE
|
||||
from .tube8 import Tube8IE
|
||||
from .tubitv import TubiTvIE
|
||||
|
||||
@@ -7,6 +7,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class FoxNewsIE(AMPIE):
|
||||
IE_NAME = 'foxnews'
|
||||
IE_DESC = 'Fox News and Fox Business Video'
|
||||
_VALID_URL = r'https?://(?P<host>video\.(?:insider\.)?fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)'
|
||||
_TESTS = [
|
||||
@@ -66,6 +67,35 @@ class FoxNewsIE(AMPIE):
|
||||
return info
|
||||
|
||||
|
||||
class FoxNewsArticleIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?foxnews\.com/(?!v)([^/]+/)+(?P<id>[a-z-]+)'
|
||||
IE_NAME = 'foxnews:article'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.foxnews.com/politics/2016/09/08/buzz-about-bud-clinton-camp-denies-claims-wore-earpiece-at-forum.html',
|
||||
'md5': '62aa5a781b308fdee212ebb6f33ae7ef',
|
||||
'info_dict': {
|
||||
'id': '5116295019001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Trump and Clinton asked to defend positions on Iraq War',
|
||||
'description': 'Veterans react on \'The Kelly File\'',
|
||||
'timestamp': 1473299755,
|
||||
'upload_date': '20160908',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_id = self._html_search_regex(
|
||||
r'data-video-id=([\'"])(?P<id>[^\'"]+)\1',
|
||||
webpage, 'video ID', group='id')
|
||||
return self.url_result(
|
||||
'http://video.foxnews.com/v/' + video_id,
|
||||
FoxNewsIE.ie_key())
|
||||
|
||||
|
||||
class FoxNewsInsiderIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://insider\.foxnews\.com/([^/]+/)+(?P<id>[a-z-]+)'
|
||||
IE_NAME = 'foxnews:insider'
|
||||
@@ -83,6 +113,10 @@ class FoxNewsInsiderIE(InfoExtractor):
|
||||
'upload_date': '20160825',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [FoxNewsIE.ie_key()],
|
||||
}
|
||||
|
||||
|
||||
77
youtube_dl/extractor/iwara.py
Normal file
77
youtube_dl/extractor/iwara.py
Normal file
@@ -0,0 +1,77 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_urlparse
|
||||
from ..utils import remove_end
|
||||
|
||||
|
||||
class IwaraIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.|ecchi\.)?iwara\.tv/videos/(?P<id>[a-zA-Z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://iwara.tv/videos/amVwUl1EHpAD9RD',
|
||||
'md5': '1d53866b2c514b23ed69e4352fdc9839',
|
||||
'info_dict': {
|
||||
'id': 'amVwUl1EHpAD9RD',
|
||||
'ext': 'mp4',
|
||||
'title': '【MMD R-18】ガールフレンド carry_me_off',
|
||||
'age_limit': 18,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://ecchi.iwara.tv/videos/Vb4yf2yZspkzkBO',
|
||||
'md5': '7e5f1f359cd51a027ba4a7b7710a50f0',
|
||||
'info_dict': {
|
||||
'id': '0B1LvuHnL-sRFNXB1WHNqbGw4SXc',
|
||||
'ext': 'mp4',
|
||||
'title': '[3D Hentai] Kyonyu Ã\x97 Genkai Ã\x97 Emaki Shinobi Girls.mp4',
|
||||
'age_limit': 18,
|
||||
},
|
||||
'add_ie': ['GoogleDrive'],
|
||||
}, {
|
||||
'url': 'http://www.iwara.tv/videos/nawkaumd6ilezzgq',
|
||||
'md5': '1d85f1e5217d2791626cff5ec83bb189',
|
||||
'info_dict': {
|
||||
'id': '6liAP9s2Ojc',
|
||||
'ext': 'mp4',
|
||||
'age_limit': 0,
|
||||
'title': '[MMD] Do It Again Ver.2 [1080p 60FPS] (Motion,Camera,Wav+DL)',
|
||||
'description': 'md5:590c12c0df1443d833fbebe05da8c47a',
|
||||
'upload_date': '20160910',
|
||||
'uploader': 'aMMDsork',
|
||||
'uploader_id': 'UCVOFyOSCyFkXTYYHITtqB7A',
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage, urlh = self._download_webpage_handle(url, video_id)
|
||||
|
||||
hostname = compat_urllib_parse_urlparse(urlh.geturl()).hostname
|
||||
# ecchi is 'sexy' in Japanese
|
||||
age_limit = 18 if hostname.split('.')[0] == 'ecchi' else 0
|
||||
|
||||
entries = self._parse_html5_media_entries(url, webpage, video_id)
|
||||
|
||||
if not entries:
|
||||
iframe_url = self._html_search_regex(
|
||||
r'<iframe[^>]+src=([\'"])(?P<url>[^\'"]+)\1',
|
||||
webpage, 'iframe URL', group='url')
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': iframe_url,
|
||||
'age_limit': age_limit,
|
||||
}
|
||||
|
||||
title = remove_end(self._html_search_regex(
|
||||
r'<title>([^<]+)</title>', webpage, 'title'), ' | Iwara')
|
||||
|
||||
info_dict = entries[0]
|
||||
info_dict.update({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'age_limit': age_limit,
|
||||
})
|
||||
|
||||
return info_dict
|
||||
52
youtube_dl/extractor/ketnet.py
Normal file
52
youtube_dl/extractor/ketnet.py
Normal file
@@ -0,0 +1,52 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class KetnetIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ketnet\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ketnet.be/kijken/zomerse-filmpjes',
|
||||
'md5': 'd907f7b1814ef0fa285c0475d9994ed7',
|
||||
'info_dict': {
|
||||
'id': 'zomerse-filmpjes',
|
||||
'ext': 'mp4',
|
||||
'title': 'Gluur mee op de filmset en op Pennenzakkenrock',
|
||||
'description': 'Gluur mee met Ghost Rockers op de filmset',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.ketnet.be/kijken/karrewiet/uitzending-8-september-2016',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.ketnet.be/achter-de-schermen/sien-repeteert-voor-stars-for-life',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
config = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)playerConfig\s*=\s*({.+?})\s*;', webpage,
|
||||
'player config'),
|
||||
video_id)
|
||||
|
||||
title = config['title']
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
config['source']['hls'], video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls')
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': config.get('description'),
|
||||
'thumbnail': config.get('image'),
|
||||
'series': config.get('program'),
|
||||
'episode': config.get('episode'),
|
||||
'formats': formats,
|
||||
}
|
||||
@@ -1,8 +1,11 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
remove_end,
|
||||
@@ -12,8 +15,10 @@ from ..utils import (
|
||||
class LRTIE(InfoExtractor):
|
||||
IE_NAME = 'lrt.lt'
|
||||
_VALID_URL = r'https?://(?:www\.)?lrt\.lt/mediateka/irasas/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
# m3u8 download
|
||||
'url': 'http://www.lrt.lt/mediateka/irasas/54391/',
|
||||
'md5': 'fe44cf7e4ab3198055f2c598fc175cb0',
|
||||
'info_dict': {
|
||||
'id': '54391',
|
||||
'ext': 'mp4',
|
||||
@@ -23,20 +28,45 @@ class LRTIE(InfoExtractor):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # m3u8 download
|
||||
}, {
|
||||
# direct mp3 download
|
||||
'url': 'http://www.lrt.lt/mediateka/irasas/1013074524/',
|
||||
'md5': '389da8ca3cad0f51d12bed0c844f6a0a',
|
||||
'info_dict': {
|
||||
'id': '1013074524',
|
||||
'ext': 'mp3',
|
||||
'title': 'Kita tema 2016-09-05 15:05',
|
||||
'description': 'md5:1b295a8fc7219ed0d543fc228c931fb5',
|
||||
'duration': 3008,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = remove_end(self._og_search_title(webpage), ' - LRT')
|
||||
m3u8_url = self._search_regex(
|
||||
r'file\s*:\s*(["\'])(?P<url>.+?)\1\s*\+\s*location\.hash\.substring\(1\)',
|
||||
webpage, 'm3u8 url', group='url')
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
|
||||
|
||||
formats = []
|
||||
for _, file_url in re.findall(
|
||||
r'file\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage):
|
||||
ext = determine_ext(file_url)
|
||||
if ext not in ('m3u8', 'mp3'):
|
||||
continue
|
||||
# mp3 served as m3u8 produces stuttered media file
|
||||
if ext == 'm3u8' and '.mp3' in file_url:
|
||||
continue
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
file_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
fatal=False))
|
||||
elif ext == 'mp3':
|
||||
formats.append({
|
||||
'url': file_url,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
@@ -1,15 +1,12 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class NewgroundsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?newgrounds\.com/(?:audio/listen|portal/view)/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.newgrounds.com/audio/listen/549479',
|
||||
'url': 'https://www.newgrounds.com/audio/listen/549479',
|
||||
'md5': 'fe6033d297591288fa1c1f780386f07a',
|
||||
'info_dict': {
|
||||
'id': '549479',
|
||||
@@ -18,7 +15,7 @@ class NewgroundsIE(InfoExtractor):
|
||||
'uploader': 'Burn7',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.newgrounds.com/portal/view/673111',
|
||||
'url': 'https://www.newgrounds.com/portal/view/673111',
|
||||
'md5': '3394735822aab2478c31b1004fe5e5bc',
|
||||
'info_dict': {
|
||||
'id': '673111',
|
||||
@@ -29,24 +26,20 @@ class NewgroundsIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
music_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, music_id)
|
||||
media_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, media_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<title>([^>]+)</title>', webpage, 'title')
|
||||
|
||||
uploader = self._html_search_regex(
|
||||
[r',"artist":"([^"]+)",', r'[\'"]owner[\'"]\s*:\s*[\'"]([^\'"]+)[\'"],'],
|
||||
webpage, 'uploader')
|
||||
r'Author\s*<a[^>]+>([^<]+)', webpage, 'uploader', fatal=False)
|
||||
|
||||
music_url_json_string = self._html_search_regex(
|
||||
r'({"url":"[^"]+"),', webpage, 'music url') + '}'
|
||||
music_url_json = json.loads(music_url_json_string)
|
||||
music_url = music_url_json['url']
|
||||
music_url = self._parse_json(self._search_regex(
|
||||
r'"url":("[^"]+"),', webpage, ''), media_id)
|
||||
|
||||
return {
|
||||
'id': music_id,
|
||||
'id': media_id,
|
||||
'title': title,
|
||||
'url': music_url,
|
||||
'uploader': uploader,
|
||||
|
||||
@@ -44,7 +44,20 @@ class NineNowIE(InfoExtractor):
|
||||
page_data = self._parse_json(self._search_regex(
|
||||
r'window\.__data\s*=\s*({.*?});', webpage,
|
||||
'page data'), display_id)
|
||||
common_data = page_data.get('episode', {}).get('episode') or page_data.get('clip', {}).get('clip')
|
||||
|
||||
for kind in ('episode', 'clip'):
|
||||
current_key = page_data.get(kind, {}).get(
|
||||
'current%sKey' % kind.capitalize())
|
||||
if not current_key:
|
||||
continue
|
||||
cache = page_data.get(kind, {}).get('%sCache' % kind, {})
|
||||
if not cache:
|
||||
continue
|
||||
common_data = (cache.get(current_key) or list(cache.values())[0])[kind]
|
||||
break
|
||||
else:
|
||||
raise ExtractorError('Unable to find video data')
|
||||
|
||||
video_data = common_data['video']
|
||||
|
||||
if video_data.get('drm'):
|
||||
|
||||
@@ -60,7 +60,7 @@ class OpenloadIE(InfoExtractor):
|
||||
if j >= 33 and j <= 126:
|
||||
j = ((j + 14) % 94) + 33
|
||||
if idx == len(enc_data) - 1:
|
||||
j += 1
|
||||
j += 3
|
||||
video_url_chars += compat_chr(j)
|
||||
|
||||
video_url = 'https://openload.co/stream/%s?mime=true' % ''.join(video_url_chars)
|
||||
|
||||
@@ -1,53 +1,40 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class ParliamentLiveUKIE(InfoExtractor):
|
||||
IE_NAME = 'parliamentlive.tv'
|
||||
IE_DESC = 'UK parliament videos'
|
||||
_VALID_URL = r'https?://www\.parliamentlive\.tv/Main/Player\.aspx\?(?:[^&]+&)*?meetingId=(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?parliamentlive\.tv/Event/Index/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.parliamentlive.tv/Main/Player.aspx?meetingId=15121&player=windowsmedia',
|
||||
'url': 'http://parliamentlive.tv/Event/Index/c1e9d44d-fd6c-4263-b50f-97ed26cc998b',
|
||||
'info_dict': {
|
||||
'id': '15121',
|
||||
'ext': 'asf',
|
||||
'title': 'hoc home affairs committee, 18 mar 2014.pm',
|
||||
'description': 'md5:033b3acdf83304cd43946b2d5e5798d1',
|
||||
'id': 'c1e9d44d-fd6c-4263-b50f-97ed26cc998b',
|
||||
'ext': 'mp4',
|
||||
'title': 'Home Affairs Committee',
|
||||
'uploader_id': 'FFMPEG-01',
|
||||
'timestamp': 1422696664,
|
||||
'upload_date': '20150131',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # Requires mplayer (mms)
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
asx_url = self._html_search_regex(
|
||||
r'embed.*?src="([^"]+)" name="MediaPlayer"', webpage,
|
||||
'metadata URL')
|
||||
asx = self._download_xml(asx_url, video_id, 'Downloading ASX metadata')
|
||||
video_url = asx.find('.//REF').attrib['HREF']
|
||||
|
||||
title = self._search_regex(
|
||||
r'''(?x)player\.setClipDetails\(
|
||||
(?:(?:[0-9]+|"[^"]+"),\s*){2}
|
||||
"([^"]+",\s*"[^"]+)"
|
||||
''',
|
||||
webpage, 'title').replace('", "', ', ')
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<span id="MainContentPlaceHolder_CaptionsBlock_WitnessInfo">(.*?)</span>',
|
||||
webpage, 'description')
|
||||
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
'http://vodplayer.parliamentlive.tv/?mid=' + video_id, video_id)
|
||||
widget_config = self._parse_json(self._search_regex(
|
||||
r'kWidgetConfig\s*=\s*({.+});',
|
||||
webpage, 'kaltura widget config'), video_id)
|
||||
kaltura_url = 'kaltura:%s:%s' % (widget_config['wid'][1:], widget_config['entry_id'])
|
||||
event_title = self._download_json(
|
||||
'http://parliamentlive.tv/Event/GetShareVideo/' + video_id, video_id)['event']['title']
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'ext': 'asf',
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'title': event_title,
|
||||
'description': '',
|
||||
'url': kaltura_url,
|
||||
'ie_key': 'Kaltura',
|
||||
}
|
||||
|
||||
@@ -1,14 +1,17 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urlparse
|
||||
)
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
strip_or_none,
|
||||
unified_timestamp,
|
||||
@@ -97,3 +100,81 @@ class PolskieRadioIE(InfoExtractor):
|
||||
description = strip_or_none(self._og_search_description(webpage))
|
||||
|
||||
return self.playlist_result(entries, playlist_id, title, description)
|
||||
|
||||
|
||||
class PolskieRadioCategoryIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?polskieradio\.pl/\d+(?:,[^/]+)?/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.polskieradio.pl/7/5102,HISTORIA-ZYWA',
|
||||
'info_dict': {
|
||||
'id': '5102',
|
||||
'title': 'HISTORIA ŻYWA',
|
||||
},
|
||||
'playlist_mincount': 38,
|
||||
}, {
|
||||
'url': 'http://www.polskieradio.pl/7/4807',
|
||||
'info_dict': {
|
||||
'id': '4807',
|
||||
'title': 'Vademecum 1050. rocznicy Chrztu Polski'
|
||||
},
|
||||
'playlist_mincount': 5
|
||||
}, {
|
||||
'url': 'http://www.polskieradio.pl/7/129,Sygnaly-dnia?ref=source',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'http://www.polskieradio.pl/37,RedakcjaKatolicka/4143,Kierunek-Krakow',
|
||||
'info_dict': {
|
||||
'id': '4143',
|
||||
'title': 'Kierunek Kraków',
|
||||
},
|
||||
'playlist_mincount': 61
|
||||
}, {
|
||||
'url': 'http://www.polskieradio.pl/10,czworka/214,muzyka',
|
||||
'info_dict': {
|
||||
'id': '214',
|
||||
'title': 'Muzyka',
|
||||
},
|
||||
'playlist_mincount': 61
|
||||
}, {
|
||||
'url': 'http://www.polskieradio.pl/7,Jedynka/5102,HISTORIA-ZYWA',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.polskieradio.pl/8,Dwojka/196,Publicystyka',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if PolskieRadioIE.suitable(url) else super(PolskieRadioCategoryIE, cls).suitable(url)
|
||||
|
||||
def _entries(self, url, page, category_id):
|
||||
content = page
|
||||
for page_num in itertools.count(2):
|
||||
for a_entry, entry_id in re.findall(
|
||||
r'(?s)<article[^>]+>.*?(<a[^>]+href=["\']/\d+/\d+/Artykul/(\d+)[^>]+>).*?</article>',
|
||||
content):
|
||||
entry = extract_attributes(a_entry)
|
||||
href = entry.get('href')
|
||||
if not href:
|
||||
continue
|
||||
yield self.url_result(
|
||||
compat_urlparse.urljoin(url, href), PolskieRadioIE.ie_key(),
|
||||
entry_id, entry.get('title'))
|
||||
mobj = re.search(
|
||||
r'<div[^>]+class=["\']next["\'][^>]*>\s*<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
content)
|
||||
if not mobj:
|
||||
break
|
||||
next_url = compat_urlparse.urljoin(url, mobj.group('url'))
|
||||
content = self._download_webpage(
|
||||
next_url, category_id, 'Downloading page %s' % page_num)
|
||||
|
||||
def _real_extract(self, url):
|
||||
category_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, category_id)
|
||||
title = self._html_search_regex(
|
||||
r'<title>([^<]+) - [^<]+ - [^<]+</title>',
|
||||
webpage, 'title', fatal=False)
|
||||
return self.playlist_result(
|
||||
self._entries(url, webpage, category_id),
|
||||
category_id, title)
|
||||
|
||||
@@ -15,6 +15,7 @@ from ..compat import (
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
orderedSet,
|
||||
sanitized_Request,
|
||||
str_to_int,
|
||||
@@ -48,6 +49,8 @@ class PornHubIE(InfoExtractor):
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
'age_limit': 18,
|
||||
'tags': list,
|
||||
'categories': list,
|
||||
},
|
||||
}, {
|
||||
# non-ASCII title
|
||||
@@ -63,6 +66,8 @@ class PornHubIE(InfoExtractor):
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
'age_limit': 18,
|
||||
'tags': list,
|
||||
'categories': list,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -183,6 +188,15 @@ class PornHubIE(InfoExtractor):
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
page_params = self._parse_json(self._search_regex(
|
||||
r'page_params\.zoneDetails\[([\'"])[^\'"]+\1\]\s*=\s*(?P<data>{[^}]+})',
|
||||
webpage, 'page parameters', group='data', default='{}'),
|
||||
video_id, transform_source=js_to_json, fatal=False)
|
||||
tags = categories = None
|
||||
if page_params:
|
||||
tags = page_params.get('tags', '').split(',')
|
||||
categories = page_params.get('categories', '').split(',')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'uploader': video_uploader,
|
||||
@@ -195,6 +209,8 @@ class PornHubIE(InfoExtractor):
|
||||
'comment_count': comment_count,
|
||||
'formats': formats,
|
||||
'age_limit': 18,
|
||||
'tags': tags,
|
||||
'categories': categories,
|
||||
}
|
||||
|
||||
|
||||
|
||||
36
youtube_dl/extractor/telequebec.py
Normal file
36
youtube_dl/extractor/telequebec.py
Normal file
@@ -0,0 +1,36 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class TeleQuebecIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://zonevideo\.telequebec\.tv/media/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://zonevideo.telequebec.tv/media/20984/le-couronnement-de-new-york/couronnement-de-new-york',
|
||||
'md5': 'fe95a0957e5707b1b01f5013e725c90f',
|
||||
'info_dict': {
|
||||
'id': '20984',
|
||||
'ext': 'mp4',
|
||||
'title': 'Le couronnement de New York',
|
||||
'description': 'md5:f5b3d27a689ec6c1486132b2d687d432',
|
||||
'upload_date': '20160220',
|
||||
'timestamp': 1455965438,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
media_id = self._match_id(url)
|
||||
media_data = self._download_json(
|
||||
'https://mnmedias.api.telequebec.tv/api/v2/media/' + media_id,
|
||||
media_id)['media']
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': media_id,
|
||||
'url': 'limelight:media:' + media_data['streamInfo']['sourceId'],
|
||||
'title': media_data['title'],
|
||||
'description': media_data.get('descriptions', [{'text': None}])[0].get('text'),
|
||||
'duration': int_or_none(media_data.get('durationInMilliseconds'), 1000),
|
||||
'ie_key': 'LimelightMedia',
|
||||
}
|
||||
53
youtube_dl/extractor/tfo.py
Normal file
53
youtube_dl/extractor/tfo.py
Normal file
@@ -0,0 +1,53 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
HEADRequest,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class TFOIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?tfo\.org/(?:en|fr)/(?:[^/]+/){2}(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.tfo.org/en/universe/tfo-247/100463871/video-game-hackathon',
|
||||
'md5': '47c987d0515561114cf03d1226a9d4c7',
|
||||
'info_dict': {
|
||||
'id': '100463871',
|
||||
'ext': 'mp4',
|
||||
'title': 'Video Game Hackathon',
|
||||
'description': 'md5:558afeba217c6c8d96c60e5421795c07',
|
||||
'upload_date': '20160212',
|
||||
'timestamp': 1455310233,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
self._request_webpage(HEADRequest('http://www.tfo.org/'), video_id)
|
||||
infos = self._download_json(
|
||||
'http://www.tfo.org/api/web/video/get_infos', video_id, data=json.dumps({
|
||||
'product_id': video_id,
|
||||
}).encode(), headers={
|
||||
'X-tfo-session': self._get_cookies('http://www.tfo.org/')['tfo-session'].value,
|
||||
})
|
||||
if infos.get('success') == 0:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, infos['msg']), expected=True)
|
||||
video_data = infos['data']
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'url': 'limelight:media:' + video_data['llid'],
|
||||
'title': video_data['title'],
|
||||
'description': video_data.get('description'),
|
||||
'series': video_data.get('collection'),
|
||||
'season_number': int_or_none(video_data.get('season')),
|
||||
'episode_number': int_or_none(video_data.get('episode')),
|
||||
'duration': int_or_none(video_data.get('duration')),
|
||||
'ie_key': 'LimelightMedia',
|
||||
}
|
||||
@@ -1,36 +0,0 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .nuevo import NuevoBaseIE
|
||||
|
||||
|
||||
class TrollvidsIE(NuevoBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?trollvids\.com/video/(?P<id>\d+)/(?P<display_id>[^/?#&]+)'
|
||||
IE_NAME = 'trollvids'
|
||||
_TEST = {
|
||||
'url': 'http://trollvids.com/video/2349002/%E3%80%90MMD-R-18%E3%80%91%E3%82%AC%E3%83%BC%E3%83%AB%E3%83%95%E3%83%AC%E3%83%B3%E3%83%89-carrymeoff',
|
||||
'md5': '1d53866b2c514b23ed69e4352fdc9839',
|
||||
'info_dict': {
|
||||
'id': '2349002',
|
||||
'ext': 'mp4',
|
||||
'title': '【MMD R-18】ガールフレンド carry_me_off',
|
||||
'age_limit': 18,
|
||||
'duration': 216.78,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
info = self._extract_nuevo(
|
||||
'http://trollvids.com/nuevo/player/config.php?v=%s' % video_id,
|
||||
video_id)
|
||||
info.update({
|
||||
'display_id': display_id,
|
||||
'age_limit': 18
|
||||
})
|
||||
return info
|
||||
@@ -1,5 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
str_to_int,
|
||||
@@ -21,7 +23,13 @@ class Tube8IE(KeezMoviesIE):
|
||||
'title': 'Kasia music video',
|
||||
'age_limit': 18,
|
||||
'duration': 230,
|
||||
'categories': ['Teen'],
|
||||
'tags': ['dancing'],
|
||||
},
|
||||
'params': {
|
||||
'proxy': '127.0.0.1:8118',
|
||||
}
|
||||
|
||||
}, {
|
||||
'url': 'http://www.tube8.com/shemale/teen/blonde-cd-gets-kidnapped-by-two-blacks-and-punished-for-being-a-slutty-girl/19569151/',
|
||||
'only_matching': True,
|
||||
@@ -51,6 +59,17 @@ class Tube8IE(KeezMoviesIE):
|
||||
r'<span id="allCommentsCount">(\d+)</span>',
|
||||
webpage, 'comment count', fatal=False))
|
||||
|
||||
category = self._search_regex(
|
||||
r'Category:\s*</strong>\s*<a[^>]+href=[^>]+>([^<]+)',
|
||||
webpage, 'category', fatal=False)
|
||||
categories = [category] if category else None
|
||||
|
||||
tags_str = self._search_regex(
|
||||
r'(?s)Tags:\s*</strong>(.+?)</(?!a)',
|
||||
webpage, 'tags', fatal=False)
|
||||
tags = [t for t in re.findall(
|
||||
r'<a[^>]+href=[^>]+>([^<]+)', tags_str)] if tags_str else None
|
||||
|
||||
info.update({
|
||||
'description': description,
|
||||
'uploader': uploader,
|
||||
@@ -58,6 +77,8 @@ class Tube8IE(KeezMoviesIE):
|
||||
'like_count': like_count,
|
||||
'dislike_count': dislike_count,
|
||||
'comment_count': comment_count,
|
||||
'categories': categories,
|
||||
'tags': tags,
|
||||
})
|
||||
|
||||
return info
|
||||
|
||||
@@ -367,6 +367,10 @@ class ViafreeIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [TVPlayIE.ie_key()],
|
||||
}, {
|
||||
# Different og:image URL schema
|
||||
'url': 'www.viafree.se/program/reality/sommaren-med-youtube-stjarnorna/sasong-1/avsnitt-2',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.viafree.no/programmer/underholdning/det-beste-vorspielet/sesong-2/episode-1',
|
||||
'only_matching': True,
|
||||
@@ -384,14 +388,35 @@ class ViafreeIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)window\.App\s*=\s*({.+?})\s*;\s*</script',
|
||||
webpage, 'data', default='{}'),
|
||||
video_id, transform_source=lambda x: re.sub(
|
||||
r'(?s)function\s+[a-zA-Z_][\da-zA-Z_]*\s*\([^)]*\)\s*{[^}]*}\s*',
|
||||
'null', x), fatal=False)
|
||||
|
||||
video_id = None
|
||||
|
||||
thumbnail = self._og_search_thumbnail(webpage, default=None)
|
||||
if thumbnail:
|
||||
video_id = self._search_regex(
|
||||
r'https?://[^/]+/imagecache/(?:[^/]+/)+seasons/\d+/(\d{6,})/',
|
||||
thumbnail, 'video id', default=None)
|
||||
if data:
|
||||
video_id = try_get(
|
||||
data, lambda x: x['context']['dispatcher']['stores'][
|
||||
'ContentPageProgramStore']['currentVideo']['id'],
|
||||
compat_str)
|
||||
|
||||
# Fallback #1 (extract from og:image URL schema)
|
||||
if not video_id:
|
||||
thumbnail = self._og_search_thumbnail(webpage, default=None)
|
||||
if thumbnail:
|
||||
video_id = self._search_regex(
|
||||
# Patterns seen:
|
||||
# http://cdn.playapi.mtgx.tv/imagecache/600x315/cloud/content-images/inbox/765166/a2e95e5f1d735bab9f309fa345cc3f25.jpg
|
||||
# http://cdn.playapi.mtgx.tv/imagecache/600x315/cloud/content-images/seasons/15204/758770/4a5ba509ca8bc043e1ebd1a76131cdf2.jpg
|
||||
r'https?://[^/]+/imagecache/(?:[^/]+/)+(\d{6,})/',
|
||||
thumbnail, 'video id', default=None)
|
||||
|
||||
# Fallback #2. Extract from raw JSON string.
|
||||
# May extract wrong video id if relatedClips is present.
|
||||
if not video_id:
|
||||
video_id = self._search_regex(
|
||||
r'currentVideo["\']\s*:\s*.+?["\']id["\']\s*:\s*["\'](\d{6,})',
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2016.09.08'
|
||||
__version__ = '2016.09.11'
|
||||
|
||||
Reference in New Issue
Block a user