1
0
mirror of https://source.netsyms.com/Mirrors/youtube-dl synced 2026-05-23 22:30:31 +00:00

Compare commits

...

20 Commits

Author SHA1 Message Date
Sergey M․
5fc2757682 release 2016.06.18 2016-06-18 06:00:05 +07:00
Sergey M․
e3944c2621 [pornhd] Add working test 2016-06-18 05:50:17 +07:00
Sergey M․
667d96480b [pornhd] Detect removed videos and modernize 2016-06-18 05:42:20 +07:00
Sergey M․
e6fe993c31 [pornhd] Improve formats extraction 2016-06-18 05:37:53 +07:00
Sergey M․
d0d93f76ea [pornhd] Fix metadata extraction 2016-06-18 05:30:46 +07:00
Sergey M․
20a6a154fe [mtv] Use compat_xpath and fix FutureWarning 2016-06-18 04:46:26 +07:00
Sergey M․
f011876076 [nickde] Add extractor (Closes #9778) 2016-06-18 04:40:48 +07:00
Sergey M․
6929569403 [mitele] Extract series metadata and make title more robust (Closes #9758) 2016-06-18 04:06:19 +07:00
Sergey M․
eb451890da [carambatv] Add extractor (Closes #9815) 2016-06-18 03:04:14 +07:00
Sergey M․
ded7511a70 [bbccouk] Add support for playlists (Closes #9812) 2016-06-17 23:42:52 +07:00
Sergey M․
d2161cade5 release 2016.06.16 2016-06-16 22:40:55 +07:00
Sergey M․
27e5fa8198 [cda] Fix extraction (Closes #9803) 2016-06-16 22:33:12 +07:00
Yen Chi Hsuan
efbd1eb51a [wimp] Fix extraction and update _TESTS 2016-06-16 12:27:21 +08:00
Yen Chi Hsuan
369ff75081 [jwplatform] Improved JWPlayer support 2016-06-16 12:26:45 +08:00
Yen Chi Hsuan
47212f7bcb [utils] Don't transform numbers not starting with a zero
Fix test_Viidea and maybe others
2016-06-16 11:00:54 +08:00
Sergey M․
4c93ee8d14 [imdb] Improve _VALID_URL (Closes #9788) 2016-06-15 22:34:55 +07:00
Yen Chi Hsuan
8bc4dbb1af [wrzuta.pl] Detect error and update _TESTS 2016-06-14 11:14:59 +08:00
Sergey M․
6c3760292c [pornhub] Improve title extraction (Closes #9777) 2016-06-14 04:57:59 +07:00
Sergey M․
4cef70db6c [devscripts/release.sh] Add flag for gpg-sign commits 2016-06-14 03:16:56 +07:00
Sergey M․
ff4af6ec59 [lynda] Remove superfluous _NETRC_MACHINE 2016-06-14 02:49:33 +07:00
20 changed files with 407 additions and 68 deletions

View File

@@ -6,8 +6,8 @@
--- ---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.14*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. ### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.18*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.14** - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.18**
### Before submitting an *issue* make sure you have: ### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2016.06.14 [debug] youtube-dl version 2016.06.18
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@@ -15,6 +15,7 @@
set -e set -e
skip_tests=true skip_tests=true
gpg_sign_commits=""
buildserver='localhost:8142' buildserver='localhost:8142'
while true while true
@@ -24,6 +25,10 @@ case "$1" in
skip_tests=false skip_tests=false
shift shift
;; ;;
--gpg-sign-commits|-S)
gpg_sign_commits="-S"
shift
;;
--buildserver) --buildserver)
buildserver="$2" buildserver="$2"
shift 2 shift 2
@@ -69,7 +74,7 @@ sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py
/bin/echo -e "\n### Committing documentation, templates and youtube_dl/version.py..." /bin/echo -e "\n### Committing documentation, templates and youtube_dl/version.py..."
make README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE.md supportedsites make README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE.md supportedsites
git add README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE.md docs/supportedsites.md youtube_dl/version.py git add README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE.md docs/supportedsites.md youtube_dl/version.py
git commit -m "release $version" git commit $gpg_sign_commits -m "release $version"
/bin/echo -e "\n### Now tagging, signing and pushing..." /bin/echo -e "\n### Now tagging, signing and pushing..."
git tag -s -m "Release $version" "$version" git tag -s -m "Release $version" "$version"
@@ -116,7 +121,7 @@ git clone --branch gh-pages --single-branch . build/gh-pages
"$ROOT/devscripts/gh-pages/update-copyright.py" "$ROOT/devscripts/gh-pages/update-copyright.py"
"$ROOT/devscripts/gh-pages/update-sites.py" "$ROOT/devscripts/gh-pages/update-sites.py"
git add *.html *.html.in update git add *.html *.html.in update
git commit -m "release $version" git commit $gpg_sign_commits -m "release $version"
git push "$ROOT" gh-pages git push "$ROOT" gh-pages
git push "$ORIGIN_URL" gh-pages git push "$ORIGIN_URL" gh-pages
) )

View File

@@ -44,8 +44,8 @@
- **appletrailers:section** - **appletrailers:section**
- **archive.org**: archive.org videos - **archive.org**: archive.org videos
- **ARD** - **ARD**
- **ARD:mediathek**: Saarländischer Rundfunk
- **ARD:mediathek** - **ARD:mediathek**
- **ARD:mediathek**: Saarländischer Rundfunk
- **arte.tv** - **arte.tv**
- **arte.tv:+7** - **arte.tv:+7**
- **arte.tv:cinema** - **arte.tv:cinema**
@@ -74,6 +74,8 @@
- **bbc**: BBC - **bbc**: BBC
- **bbc.co.uk**: BBC iPlayer - **bbc.co.uk**: BBC iPlayer
- **bbc.co.uk:article**: BBC articles - **bbc.co.uk:article**: BBC articles
- **bbc.co.uk:iplayer:playlist**
- **bbc.co.uk:playlist**
- **BeatportPro** - **BeatportPro**
- **Beeg** - **Beeg**
- **BehindKink** - **BehindKink**
@@ -104,6 +106,8 @@
- **canalc2.tv** - **canalc2.tv**
- **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv - **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
- **Canvas** - **Canvas**
- **CarambaTV**
- **CarambaTVPage**
- **CBC** - **CBC**
- **CBCPlayer** - **CBCPlayer**
- **CBS** - **CBS**
@@ -432,6 +436,7 @@
- **nhl.com:videocenter** - **nhl.com:videocenter**
- **nhl.com:videocenter:category**: NHL videocenter category - **nhl.com:videocenter:category**: NHL videocenter category
- **nick.com** - **nick.com**
- **nick.de**
- **niconico**: ニコニコ動画 - **niconico**: ニコニコ動画
- **NiconicoPlaylist** - **NiconicoPlaylist**
- **njoy**: N-JOY - **njoy**: N-JOY

View File

@@ -640,6 +640,9 @@ class TestUtil(unittest.TestCase):
"1":{"src":"skipped", "type": "application/vnd.apple.mpegURL"} "1":{"src":"skipped", "type": "application/vnd.apple.mpegURL"}
}''') }''')
inp = '''{"foo":101}'''
self.assertEqual(js_to_json(inp), '''{"foo":101}''')
def test_js_to_json_edgecases(self): def test_js_to_json_edgecases(self):
on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}") on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}")
self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"}) self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"})

View File

@@ -31,7 +31,7 @@ class BBCCoUkIE(InfoExtractor):
music/clips[/#]| music/clips[/#]|
radio/player/ radio/player/
) )
(?P<id>%s) (?P<id>%s)(?!/(?:episodes|broadcasts|clips))
''' % _ID_REGEX ''' % _ID_REGEX
_MEDIASELECTOR_URLS = [ _MEDIASELECTOR_URLS = [
@@ -698,7 +698,9 @@ class BBCIE(BBCCoUkIE):
@classmethod @classmethod
def suitable(cls, url): def suitable(cls, url):
return False if BBCCoUkIE.suitable(url) or BBCCoUkArticleIE.suitable(url) else super(BBCIE, cls).suitable(url) EXCLUDE_IE = (BBCCoUkIE, BBCCoUkArticleIE, BBCCoUkIPlayerPlaylistIE, BBCCoUkPlaylistIE)
return (False if any(ie.suitable(url) for ie in EXCLUDE_IE)
else super(BBCIE, cls).suitable(url))
def _extract_from_media_meta(self, media_meta, video_id): def _extract_from_media_meta(self, media_meta, video_id):
# Direct links to media in media metadata (e.g. # Direct links to media in media metadata (e.g.
@@ -975,3 +977,72 @@ class BBCCoUkArticleIE(InfoExtractor):
r'<div[^>]+typeof="Clip"[^>]+resource="([^"]+)"', webpage)] r'<div[^>]+typeof="Clip"[^>]+resource="([^"]+)"', webpage)]
return self.playlist_result(entries, playlist_id, title, description) return self.playlist_result(entries, playlist_id, title, description)
class BBCCoUkPlaylistBaseIE(InfoExtractor):
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
entries = [
self.url_result(self._URL_TEMPLATE % video_id, BBCCoUkIE.ie_key())
for video_id in re.findall(
self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage)]
title, description = self._extract_title_and_description(webpage)
return self.playlist_result(entries, playlist_id, title, description)
class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE):
IE_NAME = 'bbc.co.uk:iplayer:playlist'
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/episodes/(?P<id>%s)' % BBCCoUkIE._ID_REGEX
_URL_TEMPLATE = 'http://www.bbc.co.uk/iplayer/episode/%s'
_VIDEO_ID_TEMPLATE = r'data-ip-id=["\'](%s)'
_TEST = {
'url': 'http://www.bbc.co.uk/iplayer/episodes/b05rcz9v',
'info_dict': {
'id': 'b05rcz9v',
'title': 'The Disappearance',
'description': 'French thriller serial about a missing teenager.',
},
'playlist_mincount': 6,
}
def _extract_title_and_description(self, webpage):
title = self._search_regex(r'<h1>([^<]+)</h1>', webpage, 'title', fatal=False)
description = self._search_regex(
r'<p[^>]+class=(["\'])subtitle\1[^>]*>(?P<value>[^<]+)</p>',
webpage, 'description', fatal=False, group='value')
return title, description
class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE):
IE_NAME = 'bbc.co.uk:playlist'
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/(?P<id>%s)/(?:episodes|broadcasts|clips)' % BBCCoUkIE._ID_REGEX
_URL_TEMPLATE = 'http://www.bbc.co.uk/programmes/%s'
_VIDEO_ID_TEMPLATE = r'data-pid=["\'](%s)'
_TESTS = [{
'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/clips',
'info_dict': {
'id': 'b05rcz9v',
'title': 'The Disappearance - Clips - BBC Four',
'description': 'French thriller serial about a missing teenager.',
},
'playlist_mincount': 7,
}, {
'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/broadcasts/2016/06',
'only_matching': True,
}, {
'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/clips',
'only_matching': True,
}, {
'url': 'http://www.bbc.co.uk/programmes/b055jkys/episodes/player',
'only_matching': True,
}]
def _extract_title_and_description(self, webpage):
title = self._og_search_title(webpage, fatal=False)
description = self._og_search_description(webpage)
return title, description

View File

@@ -0,0 +1,88 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
float_or_none,
int_or_none,
try_get,
)
class CarambaTVIE(InfoExtractor):
_VALID_URL = r'(?:carambatv:|https?://video1\.carambatv\.ru/v/)(?P<id>\d+)'
_TESTS = [{
'url': 'http://video1.carambatv.ru/v/191910501',
'md5': '2f4a81b7cfd5ab866ee2d7270cb34a2a',
'info_dict': {
'id': '191910501',
'ext': 'mp4',
'title': '[BadComedian] - Разборка в Маниле (Абсолютный обзор)',
'thumbnail': 're:^https?://.*\.jpg',
'duration': 2678.31,
},
}, {
'url': 'carambatv:191910501',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
video = self._download_json(
'http://video1.carambatv.ru/v/%s/videoinfo.js' % video_id,
video_id)
title = video['title']
base_url = video.get('video') or 'http://video1.carambatv.ru/v/%s/' % video_id
formats = [{
'url': base_url + f['fn'],
'height': int_or_none(f.get('height')),
'format_id': '%sp' % f['height'] if f.get('height') else None,
} for f in video['qualities'] if f.get('fn')]
self._sort_formats(formats)
thumbnail = video.get('splash')
duration = float_or_none(try_get(
video, lambda x: x['annotations'][0]['end_time'], compat_str))
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'duration': duration,
'formats': formats,
}
class CarambaTVPageIE(InfoExtractor):
_VALID_URL = r'https?://carambatv\.ru/(?:[^/]+/)+(?P<id>[^/?#&]+)'
_TEST = {
'url': 'http://carambatv.ru/movie/bad-comedian/razborka-v-manile/',
'md5': '',
'info_dict': {
'id': '191910501',
'ext': 'mp4',
'title': '[BadComedian] - Разборка в Маниле (Абсолютный обзор)',
'thumbnail': 're:^https?://.*\.jpg$',
'duration': 2678.31,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_url = self._og_search_property('video:iframe', webpage, default=None)
if not video_url:
video_id = self._search_regex(
r'(?:video_id|crmb_vuid)\s*[:=]\s*["\']?(\d+)',
webpage, 'video id')
video_url = 'carambatv:%s' % video_id
return self.url_result(video_url, CarambaTVIE.ie_key())

View File

@@ -58,7 +58,8 @@ class CDAIE(InfoExtractor):
def extract_format(page, version): def extract_format(page, version):
unpacked = decode_packed_codes(page) unpacked = decode_packed_codes(page)
format_url = self._search_regex( format_url = self._search_regex(
r"url:\\'(.+?)\\'", unpacked, '%s url' % version, fatal=False) r"(?:file|url)\s*:\s*(\\?[\"'])(?P<url>http.+?)\1", unpacked,
'%s url' % version, fatal=False, group='url')
if not format_url: if not format_url:
return return
f = { f = {
@@ -75,7 +76,8 @@ class CDAIE(InfoExtractor):
info_dict['formats'].append(f) info_dict['formats'].append(f)
if not info_dict['duration']: if not info_dict['duration']:
info_dict['duration'] = parse_duration(self._search_regex( info_dict['duration'] = parse_duration(self._search_regex(
r"duration:\\'(.+?)\\'", unpacked, 'duration', fatal=False)) r"duration\s*:\s*(\\?[\"'])(?P<duration>.+?)\1",
unpacked, 'duration', fatal=False, group='duration'))
extract_format(webpage, 'default') extract_format(webpage, 'default')

View File

@@ -71,6 +71,8 @@ from .bandcamp import BandcampIE, BandcampAlbumIE
from .bbc import ( from .bbc import (
BBCCoUkIE, BBCCoUkIE,
BBCCoUkArticleIE, BBCCoUkArticleIE,
BBCCoUkIPlayerPlaylistIE,
BBCCoUkPlaylistIE,
BBCIE, BBCIE,
) )
from .beeg import BeegIE from .beeg import BeegIE
@@ -108,6 +110,10 @@ from .camwithher import CamWithHerIE
from .canalplus import CanalplusIE from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE from .canalc2 import Canalc2IE
from .canvas import CanvasIE from .canvas import CanvasIE
from .carambatv import (
CarambaTVIE,
CarambaTVPageIE,
)
from .cbc import ( from .cbc import (
CBCIE, CBCIE,
CBCPlayerIE, CBCPlayerIE,
@@ -512,7 +518,10 @@ from .nhl import (
NHLVideocenterCategoryIE, NHLVideocenterCategoryIE,
NHLIE, NHLIE,
) )
from .nick import NickIE from .nick import (
NickIE,
NickDeIE,
)
from .niconico import NiconicoIE, NiconicoPlaylistIE from .niconico import NiconicoIE, NiconicoPlaylistIE
from .ninegag import NineGagIE from .ninegag import NineGagIE
from .noco import NocoIE from .noco import NocoIE

View File

@@ -12,7 +12,7 @@ from ..utils import (
class ImdbIE(InfoExtractor): class ImdbIE(InfoExtractor):
IE_NAME = 'imdb' IE_NAME = 'imdb'
IE_DESC = 'Internet Movie Database trailers' IE_DESC = 'Internet Movie Database trailers'
_VALID_URL = r'https?://(?:www|m)\.imdb\.com/video/[^/]+/vi(?P<id>\d+)' _VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video/[^/]+/|title/tt\d+.*?#lb-)vi(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.imdb.com/video/imdb/vi2524815897', 'url': 'http://www.imdb.com/video/imdb/vi2524815897',
@@ -25,6 +25,12 @@ class ImdbIE(InfoExtractor):
}, { }, {
'url': 'http://www.imdb.com/video/_/vi2524815897', 'url': 'http://www.imdb.com/video/_/vi2524815897',
'only_matching': True, 'only_matching': True,
}, {
'url': 'http://www.imdb.com/title/tt1667889/?ref_=ext_shr_eml_vi#lb-vi2524815897',
'only_matching': True,
}, {
'url': 'http://www.imdb.com/title/tt1667889/#lb-vi2524815897',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@@ -12,9 +12,35 @@ from ..utils import (
class JWPlatformBaseIE(InfoExtractor): class JWPlatformBaseIE(InfoExtractor):
@staticmethod
def _find_jwplayer_data(webpage):
# TODO: Merge this with JWPlayer-related codes in generic.py
mobj = re.search(
'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\((?P<options>[^)]+)\)',
webpage)
if mobj:
return mobj.group('options')
def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
jwplayer_data = self._parse_json(
self._find_jwplayer_data(webpage), video_id)
return self._parse_jwplayer_data(
jwplayer_data, video_id, *args, **kwargs)
def _parse_jwplayer_data(self, jwplayer_data, video_id, require_title=True, m3u8_id=None, rtmp_params=None): def _parse_jwplayer_data(self, jwplayer_data, video_id, require_title=True, m3u8_id=None, rtmp_params=None):
# JWPlayer backward compatibility: flattened playlists
# https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96
if 'playlist' not in jwplayer_data:
jwplayer_data = {'playlist': [jwplayer_data]}
video_data = jwplayer_data['playlist'][0] video_data = jwplayer_data['playlist'][0]
# JWPlayer backward compatibility: flattened sources
# https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35
if 'sources' not in video_data:
video_data['sources'] = [video_data]
formats = [] formats = []
for source in video_data['sources']: for source in video_data['sources']:
source_url = self._proto_relative_url(source['file']) source_url = self._proto_relative_url(source['file'])

View File

@@ -95,7 +95,6 @@ class LyndaIE(LyndaBaseIE):
IE_NAME = 'lynda' IE_NAME = 'lynda'
IE_DESC = 'lynda.com videos' IE_DESC = 'lynda.com videos'
_VALID_URL = r'https?://www\.lynda\.com/(?:[^/]+/[^/]+/\d+|player/embed)/(?P<id>\d+)' _VALID_URL = r'https?://www\.lynda\.com/(?:[^/]+/[^/]+/\d+|player/embed)/(?P<id>\d+)'
_NETRC_MACHINE = 'lynda'
_TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]' _TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]'

View File

@@ -1,5 +1,8 @@
# coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_urllib_parse_urlencode, compat_urllib_parse_urlencode,
@@ -8,6 +11,7 @@ from ..compat import (
from ..utils import ( from ..utils import (
get_element_by_attribute, get_element_by_attribute,
int_or_none, int_or_none,
remove_start,
) )
@@ -15,7 +19,7 @@ class MiTeleIE(InfoExtractor):
IE_DESC = 'mitele.es' IE_DESC = 'mitele.es'
_VALID_URL = r'https?://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<id>[^/]+)/' _VALID_URL = r'https?://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<id>[^/]+)/'
_TEST = { _TESTS = [{
'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/', 'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
# MD5 is unstable # MD5 is unstable
'info_dict': { 'info_dict': {
@@ -24,10 +28,31 @@ class MiTeleIE(InfoExtractor):
'ext': 'flv', 'ext': 'flv',
'title': 'Tor, la web invisible', 'title': 'Tor, la web invisible',
'description': 'md5:3b6fce7eaa41b2d97358726378d9369f', 'description': 'md5:3b6fce7eaa41b2d97358726378d9369f',
'series': 'Diario de',
'season': 'La redacción',
'episode': 'Programa 144',
'thumbnail': 're:(?i)^https?://.*\.jpg$', 'thumbnail': 're:(?i)^https?://.*\.jpg$',
'duration': 2913, 'duration': 2913,
}, },
} }, {
# no explicit title
'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/temporada-6/programa-226/',
'info_dict': {
'id': 'eLZSwoEd1S3pVyUm8lc6F',
'display_id': 'programa-226',
'ext': 'flv',
'title': 'Cuarto Milenio - Temporada 6 - Programa 226',
'description': 'md5:50daf9fadefa4e62d9fc866d0c015701',
'series': 'Cuarto Milenio',
'season': 'Temporada 6',
'episode': 'Programa 226',
'thumbnail': 're:(?i)^https?://.*\.jpg$',
'duration': 7312,
},
'params': {
'skip_download': True,
},
}]
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
@@ -70,7 +95,22 @@ class MiTeleIE(InfoExtractor):
self._sort_formats(formats) self._sort_formats(formats)
title = self._search_regex( title = self._search_regex(
r'class="Destacado-text"[^>]*>\s*<strong>([^<]+)</strong>', webpage, 'title') r'class="Destacado-text"[^>]*>\s*<strong>([^<]+)</strong>',
webpage, 'title', default=None)
mobj = re.search(r'''(?sx)
class="Destacado-text"[^>]*>.*?<h1>\s*
<span>(?P<series>[^<]+)</span>\s*
<span>(?P<season>[^<]+)</span>\s*
<span>(?P<episode>[^<]+)</span>''', webpage)
series, season, episode = mobj.groups() if mobj else [None] * 3
if not title:
if mobj:
title = '%s - %s - %s' % (series, season, episode)
else:
title = remove_start(self._search_regex(
r'<title>([^<]+)</title>', webpage, 'title'), 'Ver online ')
video_id = self._search_regex( video_id = self._search_regex(
r'data-media-id\s*=\s*"([^"]+)"', webpage, r'data-media-id\s*=\s*"([^"]+)"', webpage,
@@ -83,6 +123,9 @@ class MiTeleIE(InfoExtractor):
'display_id': display_id, 'display_id': display_id,
'title': title, 'title': title,
'description': get_element_by_attribute('class', 'text', webpage), 'description': get_element_by_attribute('class', 'text', webpage),
'series': series,
'season': season,
'episode': episode,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'duration': duration, 'duration': duration,
'formats': formats, 'formats': formats,

View File

@@ -6,6 +6,7 @@ from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_urllib_parse_urlencode, compat_urllib_parse_urlencode,
compat_str, compat_str,
compat_xpath,
) )
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
@@ -139,9 +140,9 @@ class MTVServicesInfoExtractor(InfoExtractor):
itemdoc, './/{http://search.yahoo.com/mrss/}category', itemdoc, './/{http://search.yahoo.com/mrss/}category',
'scheme', 'urn:mtvn:video_title') 'scheme', 'urn:mtvn:video_title')
if title_el is None: if title_el is None:
title_el = itemdoc.find('.//{http://search.yahoo.com/mrss/}title') title_el = itemdoc.find(compat_xpath('.//{http://search.yahoo.com/mrss/}title'))
if title_el is None: if title_el is None:
title_el = itemdoc.find('.//title') or itemdoc.find('./title') title_el = itemdoc.find(compat_xpath('.//title'))
if title_el.text is None: if title_el.text is None:
title_el = None title_el = None

View File

@@ -3,6 +3,7 @@ from __future__ import unicode_literals
from .mtv import MTVServicesInfoExtractor from .mtv import MTVServicesInfoExtractor
from ..compat import compat_urllib_parse_urlencode from ..compat import compat_urllib_parse_urlencode
from ..utils import update_url_query
class NickIE(MTVServicesInfoExtractor): class NickIE(MTVServicesInfoExtractor):
@@ -61,3 +62,26 @@ class NickIE(MTVServicesInfoExtractor):
def _extract_mgid(self, webpage): def _extract_mgid(self, webpage):
return self._search_regex(r'data-contenturi="([^"]+)', webpage, 'mgid') return self._search_regex(r'data-contenturi="([^"]+)', webpage, 'mgid')
class NickDeIE(MTVServicesInfoExtractor):
IE_NAME = 'nick.de'
_VALID_URL = r'https?://(?:www\.)?nick\.de/(?:playlist|shows)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'http://www.nick.de/playlist/3773-top-videos/videos/episode/17306-zu-wasser-und-zu-land-rauchende-erdnusse',
'only_matching': True,
}, {
'url': 'http://www.nick.de/shows/342-icarly',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
mrss_url = update_url_query(self._search_regex(
r'data-mrss=(["\'])(?P<url>http.+?)\1', webpage, 'mrss url', group='url'),
{'siteKey': 'nick.de'})
return self._get_videos_info_from_url(mrss_url, video_id)

View File

@@ -1,19 +1,32 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
import json
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError,
int_or_none, int_or_none,
js_to_json, js_to_json,
qualities,
) )
class PornHdIE(InfoExtractor): class PornHdIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<id>\d+)(?:/(?P<display_id>.+))?' _VALID_URL = r'https?://(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<id>\d+)(?:/(?P<display_id>.+))?'
_TEST = { _TESTS = [{
'url': 'http://www.pornhd.com/videos/9864/selfie-restroom-masturbation-fun-with-chubby-cutie-hd-porn-video',
'md5': 'c8b964b1f0a4b5f7f28ae3a5c9f86ad5',
'info_dict': {
'id': '9864',
'display_id': 'selfie-restroom-masturbation-fun-with-chubby-cutie-hd-porn-video',
'ext': 'mp4',
'title': 'Restroom selfie masturbation',
'description': 'md5:3748420395e03e31ac96857a8f125b2b',
'thumbnail': 're:^https?://.*\.jpg',
'view_count': int,
'age_limit': 18,
}
}, {
# removed video
'url': 'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video', 'url': 'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video',
'md5': '956b8ca569f7f4d8ec563e2c41598441', 'md5': '956b8ca569f7f4d8ec563e2c41598441',
'info_dict': { 'info_dict': {
@@ -25,8 +38,9 @@ class PornHdIE(InfoExtractor):
'thumbnail': 're:^https?://.*\.jpg', 'thumbnail': 're:^https?://.*\.jpg',
'view_count': int, 'view_count': int,
'age_limit': 18, 'age_limit': 18,
} },
} 'skip': 'Not available anymore',
}]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
@@ -38,28 +52,38 @@ class PornHdIE(InfoExtractor):
title = self._html_search_regex( title = self._html_search_regex(
[r'<span[^>]+class=["\']video-name["\'][^>]*>([^<]+)', [r'<span[^>]+class=["\']video-name["\'][^>]*>([^<]+)',
r'<title>(.+?) - .*?[Pp]ornHD.*?</title>'], webpage, 'title') r'<title>(.+?) - .*?[Pp]ornHD.*?</title>'], webpage, 'title')
description = self._html_search_regex(
r'<div class="description">([^<]+)</div>', webpage, 'description', fatal=False)
view_count = int_or_none(self._html_search_regex(
r'(\d+) views\s*</span>', webpage, 'view count', fatal=False))
thumbnail = self._search_regex(
r"'poster'\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False)
quality = qualities(['sd', 'hd']) sources = self._parse_json(js_to_json(self._search_regex(
sources = json.loads(js_to_json(self._search_regex(
r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}[;,)]", r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}[;,)]",
webpage, 'sources'))) webpage, 'sources', default='{}')), video_id)
if not sources:
message = self._html_search_regex(
r'(?s)<(div|p)[^>]+class="no-video"[^>]*>(?P<value>.+?)</\1',
webpage, 'error message', group='value')
raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True)
formats = [] formats = []
for qname, video_url in sources.items(): for format_id, video_url in sources.items():
if not video_url: if not video_url:
continue continue
height = int_or_none(self._search_regex(
r'^(\d+)[pP]', format_id, 'height', default=None))
formats.append({ formats.append({
'url': video_url, 'url': video_url,
'format_id': qname, 'format_id': format_id,
'quality': quality(qname), 'height': height,
}) })
self._sort_formats(formats) self._sort_formats(formats)
description = self._html_search_regex(
r'<(div|p)[^>]+class="description"[^>]*>(?P<value>[^<]+)</\1',
webpage, 'description', fatal=False, group='value')
view_count = int_or_none(self._html_search_regex(
r'(\d+) views\s*<', webpage, 'view count', fatal=False))
thumbnail = self._search_regex(
r"'poster'\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False)
return { return {
'id': video_id, 'id': video_id,
'display_id': display_id, 'display_id': display_id,

View File

@@ -1,3 +1,4 @@
# coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import itertools import itertools
@@ -39,7 +40,25 @@ class PornHubIE(InfoExtractor):
'dislike_count': int, 'dislike_count': int,
'comment_count': int, 'comment_count': int,
'age_limit': 18, 'age_limit': 18,
} },
}, {
# non-ASCII title
'url': 'http://www.pornhub.com/view_video.php?viewkey=1331683002',
'info_dict': {
'id': '1331683002',
'ext': 'mp4',
'title': '重庆婷婷女王足交',
'uploader': 'cj397186295',
'duration': 1753,
'view_count': int,
'like_count': int,
'dislike_count': int,
'comment_count': int,
'age_limit': 18,
},
'params': {
'skip_download': True,
},
}, { }, {
'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d', 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d',
'only_matching': True, 'only_matching': True,
@@ -76,19 +95,25 @@ class PornHubIE(InfoExtractor):
'PornHub said: %s' % error_msg, 'PornHub said: %s' % error_msg,
expected=True, video_id=video_id) expected=True, video_id=video_id)
# video_title from flashvars contains whitespace instead of non-ASCII (see
# http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying
# on that anymore.
title = self._html_search_meta(
'twitter:title', webpage, default=None) or self._search_regex(
(r'<h1[^>]+class=["\']title["\'][^>]*>(?P<title>[^<]+)',
r'<div[^>]+data-video-title=(["\'])(?P<title>.+?)\1',
r'shareTitle\s*=\s*(["\'])(?P<title>.+?)\1'),
webpage, 'title', group='title')
flashvars = self._parse_json( flashvars = self._parse_json(
self._search_regex( self._search_regex(
r'var\s+flashvars_\d+\s*=\s*({.+?});', webpage, 'flashvars', default='{}'), r'var\s+flashvars_\d+\s*=\s*({.+?});', webpage, 'flashvars', default='{}'),
video_id) video_id)
if flashvars: if flashvars:
video_title = flashvars.get('video_title')
thumbnail = flashvars.get('image_url') thumbnail = flashvars.get('image_url')
duration = int_or_none(flashvars.get('video_duration')) duration = int_or_none(flashvars.get('video_duration'))
else: else:
video_title, thumbnail, duration = [None] * 3 title, thumbnail, duration = [None] * 3
if not video_title:
video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title')
video_uploader = self._html_search_regex( video_uploader = self._html_search_regex(
r'(?s)From:&nbsp;.+?<(?:a href="/users/|a href="/channels/|span class="username)[^>]+>(.+?)<', r'(?s)From:&nbsp;.+?<(?:a href="/users/|a href="/channels/|span class="username)[^>]+>(.+?)<',
@@ -137,7 +162,7 @@ class PornHubIE(InfoExtractor):
return { return {
'id': video_id, 'id': video_id,
'uploader': video_uploader, 'uploader': video_uploader,
'title': video_title, 'title': title,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'duration': duration, 'duration': duration,
'view_count': view_count, 'view_count': view_count,

View File

@@ -1,29 +1,33 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor
from .youtube import YoutubeIE from .youtube import YoutubeIE
from .jwplatform import JWPlatformBaseIE
class WimpIE(InfoExtractor): class WimpIE(JWPlatformBaseIE):
_VALID_URL = r'https?://(?:www\.)?wimp\.com/(?P<id>[^/]+)' _VALID_URL = r'https?://(?:www\.)?wimp\.com/(?P<id>[^/]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.wimp.com/maruexhausted/', 'url': 'http://www.wimp.com/maru-is-exhausted/',
'md5': 'ee21217ffd66d058e8b16be340b74883', 'md5': 'ee21217ffd66d058e8b16be340b74883',
'info_dict': { 'info_dict': {
'id': 'maruexhausted', 'id': 'maru-is-exhausted',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Maru is exhausted.', 'title': 'Maru is exhausted.',
'description': 'md5:57e099e857c0a4ea312542b684a869b8', 'description': 'md5:57e099e857c0a4ea312542b684a869b8',
} }
}, { }, {
'url': 'http://www.wimp.com/clowncar/', 'url': 'http://www.wimp.com/clowncar/',
'md5': '4e2986c793694b55b37cf92521d12bb4', 'md5': '5c31ad862a90dc5b1f023956faec13fe',
'info_dict': { 'info_dict': {
'id': 'clowncar', 'id': 'cG4CEr2aiSg',
'ext': 'webm', 'ext': 'webm',
'title': 'It\'s like a clown car.', 'title': 'Basset hound clown car...incredible!',
'description': 'md5:0e56db1370a6e49c5c1d19124c0d2fb2', 'description': '5 of my Bassets crawled in this dog loo! www.bellinghambassets.com\n\nFor licensing/usage please contact: licensing(at)jukinmediadotcom',
'upload_date': '20140303',
'uploader': 'Gretchen Hoey',
'uploader_id': 'gretchenandjeff1',
}, },
'add_ie': ['Youtube'],
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@@ -41,14 +45,13 @@ class WimpIE(InfoExtractor):
'ie_key': YoutubeIE.ie_key(), 'ie_key': YoutubeIE.ie_key(),
} }
video_url = self._search_regex( info_dict = self._extract_jwplayer_data(
r'<video[^>]+>\s*<source[^>]+src=(["\'])(?P<url>.+?)\1', webpage, video_id, require_title=False)
webpage, 'video URL', group='url')
return { info_dict.update({
'id': video_id, 'id': video_id,
'url': video_url,
'title': self._og_search_title(webpage), 'title': self._og_search_title(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
'description': self._og_search_description(webpage), 'description': self._og_search_description(webpage),
} })
return info_dict

View File

@@ -5,6 +5,7 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError,
int_or_none, int_or_none,
qualities, qualities,
remove_start, remove_start,
@@ -27,16 +28,17 @@ class WrzutaIE(InfoExtractor):
'uploader_id': 'laboratoriumdextera', 'uploader_id': 'laboratoriumdextera',
'description': 'md5:7fb5ef3c21c5893375fda51d9b15d9cd', 'description': 'md5:7fb5ef3c21c5893375fda51d9b15d9cd',
}, },
'skip': 'Redirected to wrzuta.pl',
}, { }, {
'url': 'http://jolka85.wrzuta.pl/audio/063jOPX5ue2/liber_natalia_szroeder_-_teraz_ty', 'url': 'http://vexling.wrzuta.pl/audio/01xBFabGXu6/james_horner_-_into_the_na_39_vi_world_bonus',
'md5': 'bc78077859bea7bcfe4295d7d7fc9025', 'md5': 'f80564fb5a2ec6ec59705ae2bf2ba56d',
'info_dict': { 'info_dict': {
'id': '063jOPX5ue2', 'id': '01xBFabGXu6',
'ext': 'ogg', 'ext': 'mp3',
'title': 'Liber & Natalia Szroeder - Teraz Ty', 'title': 'James Horner - Into The Na\'vi World [Bonus]',
'duration': 203, 'description': 'md5:30a70718b2cd9df3120fce4445b0263b',
'uploader_id': 'jolka85', 'duration': 95,
'description': 'md5:2d2b6340f9188c8c4cd891580e481096', 'uploader_id': 'vexling',
}, },
}] }]
@@ -46,7 +48,10 @@ class WrzutaIE(InfoExtractor):
typ = mobj.group('typ') typ = mobj.group('typ')
uploader = mobj.group('uploader') uploader = mobj.group('uploader')
webpage = self._download_webpage(url, video_id) webpage, urlh = self._download_webpage_handle(url, video_id)
if urlh.geturl() == 'http://www.wrzuta.pl/':
raise ExtractorError('Video removed', expected=True)
quality = qualities(['SD', 'MQ', 'HQ', 'HD']) quality = qualities(['SD', 'MQ', 'HQ', 'HD'])

View File

@@ -1970,7 +1970,7 @@ def js_to_json(code):
'(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'| '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
/\*.*?\*/|,(?=\s*[\]}])| /\*.*?\*/|,(?=\s*[\]}])|
[a-zA-Z_][.a-zA-Z_0-9]*| [a-zA-Z_][.a-zA-Z_0-9]*|
(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:\s*:)?| \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:\s*:)?|
[0-9]+(?=\s*:) [0-9]+(?=\s*:)
''', fix_kv, code) ''', fix_kv, code)

View File

@@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2016.06.14' __version__ = '2016.06.18'