mirror of
https://source.netsyms.com/Mirrors/youtube-dl
synced 2026-03-29 13:13:53 +00:00
Compare commits
68 Commits
2016.04.06
...
2016.04.13
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b0ba11cc64 | ||
|
|
75af5d59ae | ||
|
|
b969d12490 | ||
|
|
466a614537 | ||
|
|
ffa2cecf72 | ||
|
|
a837416025 | ||
|
|
c9d448876f | ||
|
|
8865b8abfd | ||
|
|
c77a0c01cb | ||
|
|
12355ac473 | ||
|
|
49f523ca50 | ||
|
|
4a903b93a9 | ||
|
|
13267a2be3 | ||
|
|
134c207e3f | ||
|
|
0f56bd2178 | ||
|
|
dfbc7f7f3f | ||
|
|
7d58ea7c5b | ||
|
|
452908b257 | ||
|
|
5899e988d5 | ||
|
|
4a121d29bb | ||
|
|
7ebc36900d | ||
|
|
d7eb052fa2 | ||
|
|
a6d6722c8f | ||
|
|
66fa495868 | ||
|
|
443285aabe | ||
|
|
de728757ad | ||
|
|
f44c276842 | ||
|
|
a1fa60a934 | ||
|
|
49caf3307f | ||
|
|
6a801f4470 | ||
|
|
61dd350a04 | ||
|
|
eb9c3edd5e | ||
|
|
95153a960d | ||
|
|
6c4c7539f2 | ||
|
|
c991106706 | ||
|
|
dae2a058de | ||
|
|
c05025fdd7 | ||
|
|
bfe96d7bea | ||
|
|
ab481b48e5 | ||
|
|
92c7f3157a | ||
|
|
cacd996662 | ||
|
|
bffb245a48 | ||
|
|
680efb6723 | ||
|
|
5a9858bfa9 | ||
|
|
8a5dc1c1e1 | ||
|
|
e0986e31cf | ||
|
|
6b97ca96fc | ||
|
|
c1ce6acdd7 | ||
|
|
0d778b1db9 | ||
|
|
779822d945 | ||
|
|
1b3d5e05a8 | ||
|
|
e52d7f85f2 | ||
|
|
568d2f78d6 | ||
|
|
2f2fcf1a33 | ||
|
|
bacec0397f | ||
|
|
3c6c7e7d7e | ||
|
|
fb38aa8b53 | ||
|
|
18da24634c | ||
|
|
a134426d61 | ||
|
|
a64c0c9b06 | ||
|
|
56019444cb | ||
|
|
a1ff3cd5f9 | ||
|
|
9a32e80477 | ||
|
|
536a55dabd | ||
|
|
ed6fb8b804 | ||
|
|
3afef2e3fc | ||
|
|
e90d175436 | ||
|
|
7a93ab5f3f |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.04.06*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.04.06**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.04.13*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.04.13**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2016.04.06
|
||||
[debug] youtube-dl version 2016.04.13
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -13,6 +13,7 @@ README.txt
|
||||
youtube-dl.1
|
||||
youtube-dl.bash-completion
|
||||
youtube-dl.fish
|
||||
youtube_dl/extractor/lazy_extractors.py
|
||||
youtube-dl
|
||||
youtube-dl.exe
|
||||
youtube-dl.tar.gz
|
||||
|
||||
1
AUTHORS
1
AUTHORS
@@ -167,3 +167,4 @@ Kacper Michajłow
|
||||
José Joaquín Atria
|
||||
Viťas Strádal
|
||||
Kagami Hiiragi
|
||||
Philip Huppert
|
||||
|
||||
@@ -140,14 +140,14 @@ After you have ensured this site is distributing it's content legally, you can f
|
||||
# TODO more properties (see youtube_dl/extractor/common.py)
|
||||
}
|
||||
```
|
||||
5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py).
|
||||
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
|
||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/58525c94d547be1c8167d16c298bdd75506db328/youtube_dl/extractor/common.py#L68-L226). Add tests and code for as many as you want.
|
||||
8. Keep in mind that the only mandatory fields in info dict for successful extraction process are `id`, `title` and either `url` or `formats`, i.e. these are the critical data the extraction does not make any sense without. This means that [any field](https://github.com/rg3/youtube-dl/blob/58525c94d547be1c8167d16c298bdd75506db328/youtube_dl/extractor/common.py#L138-L226) apart from aforementioned mandatory ones should be treated **as optional** and extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. For example, if you have some intermediate dict `meta` that is a source of metadata and it has a key `summary` that you want to extract and put into resulting info dict as `description`, you should be ready that this key may be missing from the `meta` dict, i.e. you should extract it as `meta.get('summary')` and not `meta['summary']`. Similarly, you should pass `fatal=False` when extracting data from a webpage with `_search_regex/_html_search_regex`.
|
||||
9. Check the code with [flake8](https://pypi.python.org/pypi/flake8).
|
||||
10. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
|
||||
|
||||
$ git add youtube_dl/extractor/__init__.py
|
||||
$ git add youtube_dl/extractor/extractors.py
|
||||
$ git add youtube_dl/extractor/yourextractor.py
|
||||
$ git commit -m '[yourextractor] Add new extractor'
|
||||
$ git push origin yourextractor
|
||||
|
||||
8
Makefile
8
Makefile
@@ -1,7 +1,7 @@
|
||||
all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
|
||||
|
||||
clean:
|
||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
|
||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
|
||||
find . -name "*.pyc" -delete
|
||||
find . -name "*.class" -delete
|
||||
|
||||
@@ -88,6 +88,12 @@ youtube-dl.fish: youtube_dl/*.py youtube_dl/*/*.py devscripts/fish-completion.in
|
||||
|
||||
fish-completion: youtube-dl.fish
|
||||
|
||||
lazy-extractors: youtube_dl/extractor/lazy_extractors.py
|
||||
|
||||
_EXTRACTOR_FILES != find youtube_dl/extractor -iname '*.py' -and -not -iname 'lazy_extractors.py'
|
||||
youtube_dl/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES)
|
||||
$(PYTHON) devscripts/make_lazy_extractors.py $@
|
||||
|
||||
youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish
|
||||
@tar -czf youtube-dl.tar.gz --transform "s|^|youtube-dl/|" --owner 0 --group 0 \
|
||||
--exclude '*.DS_Store' \
|
||||
|
||||
@@ -889,14 +889,14 @@ After you have ensured this site is distributing it's content legally, you can f
|
||||
# TODO more properties (see youtube_dl/extractor/common.py)
|
||||
}
|
||||
```
|
||||
5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py).
|
||||
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
|
||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/58525c94d547be1c8167d16c298bdd75506db328/youtube_dl/extractor/common.py#L68-L226). Add tests and code for as many as you want.
|
||||
8. Keep in mind that the only mandatory fields in info dict for successful extraction process are `id`, `title` and either `url` or `formats`, i.e. these are the critical data the extraction does not make any sense without. This means that [any field](https://github.com/rg3/youtube-dl/blob/58525c94d547be1c8167d16c298bdd75506db328/youtube_dl/extractor/common.py#L138-L226) apart from aforementioned mandatory ones should be treated **as optional** and extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. For example, if you have some intermediate dict `meta` that is a source of metadata and it has a key `summary` that you want to extract and put into resulting info dict as `description`, you should be ready that this key may be missing from the `meta` dict, i.e. you should extract it as `meta.get('summary')` and not `meta['summary']`. Similarly, you should pass `fatal=False` when extracting data from a webpage with `_search_regex/_html_search_regex`.
|
||||
9. Check the code with [flake8](https://pypi.python.org/pypi/flake8).
|
||||
10. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
|
||||
|
||||
$ git add youtube_dl/extractor/__init__.py
|
||||
$ git add youtube_dl/extractor/extractors.py
|
||||
$ git add youtube_dl/extractor/yourextractor.py
|
||||
$ git commit -m '[yourextractor] Add new extractor'
|
||||
$ git push origin yourextractor
|
||||
|
||||
19
devscripts/lazy_load_template.py
Normal file
19
devscripts/lazy_load_template.py
Normal file
@@ -0,0 +1,19 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
|
||||
class LazyLoadExtractor(object):
|
||||
_module = None
|
||||
|
||||
@classmethod
|
||||
def ie_key(cls):
|
||||
return cls.__name__[:-2]
|
||||
|
||||
def __new__(cls, *args, **kwargs):
|
||||
mod = __import__(cls._module, fromlist=(cls.__name__,))
|
||||
real_cls = getattr(mod, cls.__name__)
|
||||
instance = real_cls.__new__(real_cls)
|
||||
instance.__init__(*args, **kwargs)
|
||||
return instance
|
||||
63
devscripts/make_lazy_extractors.py
Normal file
63
devscripts/make_lazy_extractors.py
Normal file
@@ -0,0 +1,63 @@
|
||||
from __future__ import unicode_literals, print_function
|
||||
|
||||
from inspect import getsource
|
||||
import os
|
||||
from os.path import dirname as dirn
|
||||
import sys
|
||||
|
||||
print('WARNING: Lazy loading extractors is an experimental feature that may not always work', file=sys.stderr)
|
||||
|
||||
sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
|
||||
|
||||
lazy_extractors_filename = sys.argv[1]
|
||||
if os.path.exists(lazy_extractors_filename):
|
||||
os.remove(lazy_extractors_filename)
|
||||
|
||||
from youtube_dl.extractor import _ALL_CLASSES
|
||||
from youtube_dl.extractor.common import InfoExtractor
|
||||
|
||||
with open('devscripts/lazy_load_template.py', 'rt') as f:
|
||||
module_template = f.read()
|
||||
|
||||
module_contents = [module_template + '\n' + getsource(InfoExtractor.suitable)]
|
||||
|
||||
ie_template = '''
|
||||
class {name}(LazyLoadExtractor):
|
||||
_VALID_URL = {valid_url!r}
|
||||
_module = '{module}'
|
||||
'''
|
||||
|
||||
make_valid_template = '''
|
||||
@classmethod
|
||||
def _make_valid_url(cls):
|
||||
return {valid_url!r}
|
||||
'''
|
||||
|
||||
|
||||
def build_lazy_ie(ie, name):
|
||||
valid_url = getattr(ie, '_VALID_URL', None)
|
||||
s = ie_template.format(
|
||||
name=name,
|
||||
valid_url=valid_url,
|
||||
module=ie.__module__)
|
||||
if ie.suitable.__func__ is not InfoExtractor.suitable.__func__:
|
||||
s += '\n' + getsource(ie.suitable)
|
||||
if hasattr(ie, '_make_valid_url'):
|
||||
# search extractors
|
||||
s += make_valid_template.format(valid_url=ie._make_valid_url())
|
||||
return s
|
||||
|
||||
names = []
|
||||
for ie in list(sorted(_ALL_CLASSES[:-1], key=lambda cls: cls.ie_key())) + _ALL_CLASSES[-1:]:
|
||||
name = ie.ie_key() + 'IE'
|
||||
src = build_lazy_ie(ie, name)
|
||||
module_contents.append(src)
|
||||
names.append(name)
|
||||
|
||||
module_contents.append(
|
||||
'_ALL_CLASSES = [{0}]'.format(', '.join(names)))
|
||||
|
||||
module_src = '\n'.join(module_contents) + '\n'
|
||||
|
||||
with open(lazy_extractors_filename, 'wt') as f:
|
||||
f.write(module_src)
|
||||
@@ -115,6 +115,7 @@
|
||||
- **Cinemassacre**
|
||||
- **Clipfish**
|
||||
- **cliphunter**
|
||||
- **ClipRs**
|
||||
- **Clipsyndicate**
|
||||
- **cloudtime**: CloudTime
|
||||
- **Cloudy**
|
||||
@@ -286,7 +287,6 @@
|
||||
- **ivi:compilation**: ivi.ru compilations
|
||||
- **ivideon**: Ivideon TV
|
||||
- **Izlesene**
|
||||
- **JadoreCettePub**
|
||||
- **JeuxVideo**
|
||||
- **Jove**
|
||||
- **jpopsuki.tv**
|
||||
@@ -484,6 +484,7 @@
|
||||
- **Pornotube**
|
||||
- **PornoVoisines**
|
||||
- **PornoXO**
|
||||
- **PressTV**
|
||||
- **PrimeShareTV**
|
||||
- **PromptFile**
|
||||
- **prosiebensat1**: ProSiebenSat.1 Digital
|
||||
|
||||
@@ -2,5 +2,5 @@
|
||||
universal = True
|
||||
|
||||
[flake8]
|
||||
exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,devscripts/make_issue_template.py,setup.py,build,.git
|
||||
exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,devscripts/lazy_load_template.py,devscripts/make_issue_template.py,setup.py,build,.git
|
||||
ignore = E402,E501,E731
|
||||
|
||||
22
setup.py
22
setup.py
@@ -8,11 +8,12 @@ import warnings
|
||||
import sys
|
||||
|
||||
try:
|
||||
from setuptools import setup
|
||||
from setuptools import setup, Command
|
||||
setuptools_available = True
|
||||
except ImportError:
|
||||
from distutils.core import setup
|
||||
from distutils.core import setup, Command
|
||||
setuptools_available = False
|
||||
from distutils.spawn import spawn
|
||||
|
||||
try:
|
||||
# This will create an exe that needs Microsoft Visual C++ 2008
|
||||
@@ -70,6 +71,22 @@ else:
|
||||
else:
|
||||
params['scripts'] = ['bin/youtube-dl']
|
||||
|
||||
class build_lazy_extractors(Command):
|
||||
description = "Build the extractor lazy loading module"
|
||||
user_options = []
|
||||
|
||||
def initialize_options(self):
|
||||
pass
|
||||
|
||||
def finalize_options(self):
|
||||
pass
|
||||
|
||||
def run(self):
|
||||
spawn(
|
||||
[sys.executable, 'devscripts/make_lazy_extractors.py', 'youtube_dl/extractor/lazy_extractors.py'],
|
||||
dry_run=self.dry_run,
|
||||
)
|
||||
|
||||
# Get the version from youtube_dl/version.py without importing the package
|
||||
exec(compile(open('youtube_dl/version.py').read(),
|
||||
'youtube_dl/version.py', 'exec'))
|
||||
@@ -107,5 +124,6 @@ setup(
|
||||
"Programming Language :: Python :: 3.4",
|
||||
],
|
||||
|
||||
cmdclass={'build_lazy_extractors': build_lazy_extractors},
|
||||
**params
|
||||
)
|
||||
|
||||
@@ -143,6 +143,9 @@ def expect_value(self, got, expected, field):
|
||||
expect_value(self, item_got, item_expected, field)
|
||||
else:
|
||||
if isinstance(expected, compat_str) and expected.startswith('md5:'):
|
||||
self.assertTrue(
|
||||
isinstance(got, compat_str),
|
||||
'Expected field %s to be a unicode object, but got value %r of type %r' % (field, got, type(got)))
|
||||
got = 'md5:' + md5(got)
|
||||
elif isinstance(expected, compat_str) and expected.startswith('mincount:'):
|
||||
self.assertTrue(
|
||||
|
||||
@@ -11,6 +11,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
from test.helper import FakeYDL
|
||||
from youtube_dl.extractor.common import InfoExtractor
|
||||
from youtube_dl.extractor import YoutubeIE, get_info_extractor
|
||||
from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError
|
||||
|
||||
|
||||
class TestIE(InfoExtractor):
|
||||
@@ -66,5 +67,14 @@ class TestInfoExtractor(unittest.TestCase):
|
||||
self.assertEqual(ie._html_search_meta('e', html), '5')
|
||||
self.assertEqual(ie._html_search_meta('f', html), '6')
|
||||
|
||||
def test_download_json(self):
|
||||
uri = encode_data_uri(b'{"foo": "blah"}', 'application/json')
|
||||
self.assertEqual(self.ie._download_json(uri, None), {'foo': 'blah'})
|
||||
uri = encode_data_uri(b'callback({"foo": "blah"})', 'application/javascript')
|
||||
self.assertEqual(self.ie._download_json(uri, None, transform_source=strip_jsonp), {'foo': 'blah'})
|
||||
uri = encode_data_uri(b'{"foo": invalid}', 'application/json')
|
||||
self.assertRaises(ExtractorError, self.ie._download_json, uri, None)
|
||||
self.assertEqual(self.ie._download_json(uri, None, fatal=False), None)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
@@ -20,6 +20,7 @@ from youtube_dl.utils import (
|
||||
args_to_str,
|
||||
encode_base_n,
|
||||
clean_html,
|
||||
date_from_str,
|
||||
DateRange,
|
||||
detect_exe_version,
|
||||
determine_ext,
|
||||
@@ -234,6 +235,13 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(unescapeHTML('é'), 'é')
|
||||
self.assertEqual(unescapeHTML('�'), '�')
|
||||
|
||||
def test_date_from_str(self):
|
||||
self.assertEqual(date_from_str('yesterday'), date_from_str('now-1day'))
|
||||
self.assertEqual(date_from_str('now+7day'), date_from_str('now+1week'))
|
||||
self.assertEqual(date_from_str('now+14day'), date_from_str('now+2week'))
|
||||
self.assertEqual(date_from_str('now+365day'), date_from_str('now+1year'))
|
||||
self.assertEqual(date_from_str('now+30day'), date_from_str('now+1month'))
|
||||
|
||||
def test_daterange(self):
|
||||
_20century = DateRange("19000101", "20000101")
|
||||
self.assertFalse("17890714" in _20century)
|
||||
|
||||
@@ -82,7 +82,7 @@ from .utils import (
|
||||
YoutubeDLHandler,
|
||||
)
|
||||
from .cache import Cache
|
||||
from .extractor import get_info_extractor, gen_extractors
|
||||
from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
|
||||
from .downloader import get_suitable_downloader
|
||||
from .downloader.rtmp import rtmpdump_version
|
||||
from .postprocessor import (
|
||||
@@ -378,8 +378,9 @@ class YoutubeDL(object):
|
||||
def add_info_extractor(self, ie):
|
||||
"""Add an InfoExtractor object to the end of the list."""
|
||||
self._ies.append(ie)
|
||||
self._ies_instances[ie.ie_key()] = ie
|
||||
ie.set_downloader(self)
|
||||
if not isinstance(ie, type):
|
||||
self._ies_instances[ie.ie_key()] = ie
|
||||
ie.set_downloader(self)
|
||||
|
||||
def get_info_extractor(self, ie_key):
|
||||
"""
|
||||
@@ -397,7 +398,7 @@ class YoutubeDL(object):
|
||||
"""
|
||||
Add the InfoExtractors returned by gen_extractors to the end of the list
|
||||
"""
|
||||
for ie in gen_extractors():
|
||||
for ie in gen_extractor_classes():
|
||||
self.add_info_extractor(ie)
|
||||
|
||||
def add_post_processor(self, pp):
|
||||
@@ -661,6 +662,7 @@ class YoutubeDL(object):
|
||||
if not ie.suitable(url):
|
||||
continue
|
||||
|
||||
ie = self.get_info_extractor(ie.ie_key())
|
||||
if not ie.working():
|
||||
self.report_warning('The program functionality for this site has been marked as broken, '
|
||||
'and will probably not work.')
|
||||
@@ -1240,7 +1242,10 @@ class YoutubeDL(object):
|
||||
self.list_thumbnails(info_dict)
|
||||
return
|
||||
|
||||
if thumbnails and 'thumbnail' not in info_dict:
|
||||
thumbnail = info_dict.get('thumbnail')
|
||||
if thumbnail:
|
||||
info_dict['thumbnail'] = sanitize_url(thumbnail)
|
||||
elif thumbnails:
|
||||
info_dict['thumbnail'] = thumbnails[-1]['url']
|
||||
|
||||
if 'display_id' not in info_dict and 'id' in info_dict:
|
||||
@@ -1954,6 +1959,8 @@ class YoutubeDL(object):
|
||||
write_string(encoding_str, encoding=None)
|
||||
|
||||
self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
|
||||
if _LAZY_LOADER:
|
||||
self._write_string('[debug] Lazy loading extractors enabled' + '\n')
|
||||
try:
|
||||
sp = subprocess.Popen(
|
||||
['git', 'rev-parse', '--short', 'HEAD'],
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -2,10 +2,14 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import int_or_none
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
OnDemandPagedList,
|
||||
)
|
||||
|
||||
|
||||
class ACastIE(InfoExtractor):
|
||||
@@ -26,13 +30,8 @@ class ACastIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel, display_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
embed_page = self._download_webpage(
|
||||
re.sub('(?:www\.)?acast\.com', 'embedcdn.acast.com', url), display_id)
|
||||
cast_data = self._parse_json(self._search_regex(
|
||||
r'window\[\'acast/queries\'\]\s*=\s*([^;]+);', embed_page, 'acast data'),
|
||||
display_id)['GetAcast/%s/%s' % (channel, display_id)]
|
||||
|
||||
cast_data = self._download_json(
|
||||
'https://embed.acast.com/api/acasts/%s/%s' % (channel, display_id), display_id)
|
||||
return {
|
||||
'id': compat_str(cast_data['id']),
|
||||
'display_id': display_id,
|
||||
@@ -58,15 +57,26 @@ class ACastChannelIE(InfoExtractor):
|
||||
'playlist_mincount': 20,
|
||||
}
|
||||
_API_BASE_URL = 'https://www.acast.com/api/'
|
||||
_PAGE_SIZE = 10
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if ACastIE.suitable(url) else super(ACastChannelIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
channel_data = self._download_json(self._API_BASE_URL + 'channels/%s' % display_id, display_id)
|
||||
casts = self._download_json(self._API_BASE_URL + 'channels/%s/acasts' % display_id, display_id)
|
||||
entries = [self.url_result('https://www.acast.com/%s/%s' % (display_id, cast['url']), 'ACast') for cast in casts]
|
||||
def _fetch_page(self, channel_slug, page):
|
||||
casts = self._download_json(
|
||||
self._API_BASE_URL + 'channels/%s/acasts?page=%s' % (channel_slug, page),
|
||||
channel_slug, note='Download page %d of channel data' % page)
|
||||
for cast in casts:
|
||||
yield self.url_result(
|
||||
'https://www.acast.com/%s/%s' % (channel_slug, cast['url']),
|
||||
'ACast', cast['id'])
|
||||
|
||||
return self.playlist_result(entries, compat_str(channel_data['id']), channel_data['name'], channel_data.get('description'))
|
||||
def _real_extract(self, url):
|
||||
channel_slug = self._match_id(url)
|
||||
channel_data = self._download_json(
|
||||
self._API_BASE_URL + 'channels/%s' % channel_slug, channel_slug)
|
||||
entries = OnDemandPagedList(functools.partial(
|
||||
self._fetch_page, channel_slug), self._PAGE_SIZE)
|
||||
return self.playlist_result(entries, compat_str(
|
||||
channel_data['id']), channel_data['name'], channel_data.get('description'))
|
||||
|
||||
@@ -1,11 +1,18 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class AolIE(InfoExtractor):
|
||||
IE_NAME = 'on.aol.com'
|
||||
_VALID_URL = r'(?:aol-video:|https?://on\.aol\.com/video/.*-)(?P<id>[0-9]+)(?:$|\?)'
|
||||
_VALID_URL = r'(?:aol-video:|https?://on\.aol\.com/video/.*-)(?P<id>[^/?-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img',
|
||||
@@ -14,13 +21,79 @@ class AolIE(InfoExtractor):
|
||||
'id': '518167793',
|
||||
'ext': 'mp4',
|
||||
'title': 'U.S. Official Warns Of \'Largest Ever\' IRS Phone Scam',
|
||||
'description': 'A major phone scam has cost thousands of taxpayers more than $1 million, with less than a month until income tax returns are due to the IRS.',
|
||||
'timestamp': 1395405060,
|
||||
'upload_date': '20140321',
|
||||
'uploader': 'Newsy Studio',
|
||||
},
|
||||
'add_ie': ['FiveMin'],
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://on.aol.com/video/netflix-is-raising-rates-5707d6b8e4b090497b04f706?context=PC:homepage:PL1944:1460189336183',
|
||||
'info_dict': {
|
||||
'id': '5707d6b8e4b090497b04f706',
|
||||
'ext': 'mp4',
|
||||
'title': 'Netflix is Raising Rates',
|
||||
'description': 'Netflix is rewarding millions of it’s long-standing members with an increase in cost. Veuer’s Carly Figueroa has more.',
|
||||
'upload_date': '20160408',
|
||||
'timestamp': 1460123280,
|
||||
'uploader': 'Veuer',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self.url_result('5min:%s' % video_id)
|
||||
|
||||
response = self._download_json(
|
||||
'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/%s/details' % video_id,
|
||||
video_id)['response']
|
||||
if response['statusText'] != 'Ok':
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, response['statusText']), expected=True)
|
||||
|
||||
video_data = response['data']
|
||||
formats = []
|
||||
m3u8_url = video_data.get('videoMasterPlaylist')
|
||||
if m3u8_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
for rendition in video_data.get('renditions', []):
|
||||
video_url = rendition.get('url')
|
||||
if not video_url:
|
||||
continue
|
||||
ext = rendition.get('format')
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
f = {
|
||||
'url': video_url,
|
||||
'format_id': rendition.get('quality'),
|
||||
}
|
||||
mobj = re.search(r'(\d+)x(\d+)', video_url)
|
||||
if mobj:
|
||||
f.update({
|
||||
'width': int(mobj.group(1)),
|
||||
'height': int(mobj.group(2)),
|
||||
})
|
||||
formats.append(f)
|
||||
self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_data['title'],
|
||||
'duration': int_or_none(video_data.get('duration')),
|
||||
'timestamp': int_or_none(video_data.get('publishDate')),
|
||||
'view_count': int_or_none(video_data.get('views')),
|
||||
'description': video_data.get('description'),
|
||||
'uploader': video_data.get('videoOwner'),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class AolFeaturesIE(InfoExtractor):
|
||||
|
||||
@@ -83,7 +83,7 @@ class ARDMediathekIE(InfoExtractor):
|
||||
subtitle_url = media_info.get('_subtitleUrl')
|
||||
if subtitle_url:
|
||||
subtitles['de'] = [{
|
||||
'ext': 'srt',
|
||||
'ext': 'ttml',
|
||||
'url': subtitle_url,
|
||||
}]
|
||||
|
||||
|
||||
@@ -337,7 +337,7 @@ class ArteTVEmbedIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:embed'
|
||||
_VALID_URL = r'''(?x)
|
||||
http://www\.arte\.tv
|
||||
/playerv2/embed\.php\?json_url=
|
||||
/(?:playerv2/embed|arte_vp/index)\.php\?json_url=
|
||||
(?P<json_url>
|
||||
http://arte\.tv/papi/tvguide/videos/stream/player/
|
||||
(?P<lang>[^/]+)/(?P<id>[^/]+)[^&]*
|
||||
|
||||
@@ -33,8 +33,33 @@ class BeegIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
cpl_url = self._search_regex(
|
||||
r'<script[^>]+src=(["\'])(?P<url>(?:https?:)?//static\.beeg\.com/cpl/\d+\.js.*?)\1',
|
||||
webpage, 'cpl', default=None, group='url')
|
||||
|
||||
beeg_version, beeg_salt = [None] * 2
|
||||
|
||||
if cpl_url:
|
||||
cpl = self._download_webpage(
|
||||
self._proto_relative_url(cpl_url), video_id,
|
||||
'Downloading cpl JS', fatal=False)
|
||||
if cpl:
|
||||
beeg_version = self._search_regex(
|
||||
r'beeg_version\s*=\s*(\d+)', cpl,
|
||||
'beeg version', default=None) or self._search_regex(
|
||||
r'/(\d+)\.js', cpl_url, 'beeg version', default=None)
|
||||
beeg_salt = self._search_regex(
|
||||
r'beeg_salt\s*=\s*(["\'])(?P<beeg_salt>.+?)\1', cpl, 'beeg beeg_salt',
|
||||
default=None, group='beeg_salt')
|
||||
|
||||
beeg_version = beeg_version or '1750'
|
||||
beeg_salt = beeg_salt or 'MIDtGaw96f0N1kMMAM1DE46EC9pmFr'
|
||||
|
||||
video = self._download_json(
|
||||
'https://api.beeg.com/api/v6/1738/video/%s' % video_id, video_id)
|
||||
'http://api.beeg.com/api/v6/%s/video/%s' % (beeg_version, video_id),
|
||||
video_id)
|
||||
|
||||
def split(o, e):
|
||||
def cut(s, x):
|
||||
@@ -51,7 +76,7 @@ class BeegIE(InfoExtractor):
|
||||
|
||||
def decrypt_key(key):
|
||||
# Reverse engineered from http://static.beeg.com/cpl/1738.js
|
||||
a = 'GUuyodcfS8FW8gQp4OKLMsZBcX0T7B'
|
||||
a = beeg_salt
|
||||
e = compat_urllib_parse_unquote(key)
|
||||
o = ''.join([
|
||||
compat_chr(compat_ord(e[n]) - compat_ord(a[n % len(a)]) % 21)
|
||||
@@ -101,5 +126,5 @@ class BeegIE(InfoExtractor):
|
||||
'duration': duration,
|
||||
'tags': tags,
|
||||
'formats': formats,
|
||||
'age_limit': 18,
|
||||
'age_limit': self._rta_search(webpage),
|
||||
}
|
||||
|
||||
90
youtube_dl/extractor/cliprs.py
Normal file
90
youtube_dl/extractor/cliprs.py
Normal file
@@ -0,0 +1,90 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class ClipRsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?clip\.rs/(?P<id>[^/]+)/\d+'
|
||||
_TEST = {
|
||||
'url': 'http://www.clip.rs/premijera-frajle-predstavljaju-novi-spot-za-pesmu-moli-me-moli/3732',
|
||||
'md5': 'c412d57815ba07b56f9edc7b5d6a14e5',
|
||||
'info_dict': {
|
||||
'id': '1488842.1399140381',
|
||||
'ext': 'mp4',
|
||||
'title': 'PREMIJERA Frajle predstavljaju novi spot za pesmu Moli me, moli',
|
||||
'description': 'md5:56ce2c3b4ab31c5a2e0b17cb9a453026',
|
||||
'duration': 229,
|
||||
'timestamp': 1459850243,
|
||||
'upload_date': '20160405',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'id=(["\'])mvp:(?P<id>.+?)\1', webpage, 'mvp id', group='id')
|
||||
|
||||
response = self._download_json(
|
||||
'http://qi.ckm.onetapi.pl/', video_id,
|
||||
query={
|
||||
'body[id]': video_id,
|
||||
'body[jsonrpc]': '2.0',
|
||||
'body[method]': 'get_asset_detail',
|
||||
'body[params][ID_Publikacji]': video_id,
|
||||
'body[params][Service]': 'www.onet.pl',
|
||||
'content-type': 'application/jsonp',
|
||||
'x-onet-app': 'player.front.onetapi.pl',
|
||||
})
|
||||
|
||||
error = response.get('error')
|
||||
if error:
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, error['message']), expected=True)
|
||||
|
||||
video = response['result'].get('0')
|
||||
|
||||
formats = []
|
||||
for _, formats_dict in video['formats'].items():
|
||||
if not isinstance(formats_dict, dict):
|
||||
continue
|
||||
for format_id, format_list in formats_dict.items():
|
||||
if not isinstance(format_list, list):
|
||||
continue
|
||||
for f in format_list:
|
||||
if not f.get('url'):
|
||||
continue
|
||||
formats.append({
|
||||
'url': f['url'],
|
||||
'format_id': format_id,
|
||||
'height': int_or_none(f.get('vertical_resolution')),
|
||||
'width': int_or_none(f.get('horizontal_resolution')),
|
||||
'abr': float_or_none(f.get('audio_bitrate')),
|
||||
'vbr': float_or_none(f.get('video_bitrate')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
meta = video.get('meta', {})
|
||||
|
||||
title = self._og_search_title(webpage, default=None) or meta['title']
|
||||
description = self._og_search_description(webpage, default=None) or meta.get('description')
|
||||
duration = meta.get('length') or meta.get('lenght')
|
||||
timestamp = parse_iso8601(meta.get('addDate'), ' ')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'formats': formats,
|
||||
}
|
||||
@@ -232,6 +232,24 @@ class InfoExtractor(object):
|
||||
episode_number: Number of the video episode within a season, as an integer.
|
||||
episode_id: Id of the video episode, as a unicode string.
|
||||
|
||||
The following fields should only be used when the media is a track or a part of
|
||||
a music album:
|
||||
|
||||
track: Title of the track.
|
||||
track_number: Number of the track within an album or a disc, as an integer.
|
||||
track_id: Id of the track (useful in case of custom indexing, e.g. 6.iii),
|
||||
as a unicode string.
|
||||
artist: Artist(s) of the track.
|
||||
genre: Genre(s) of the track.
|
||||
album: Title of the album the track belongs to.
|
||||
album_type: Type of the album (e.g. "Demo", "Full-length", "Split", "Compilation", etc).
|
||||
album_artist: List of all artists appeared on the album (e.g.
|
||||
"Ash Borer / Fell Voices" or "Various Artists", useful for splits
|
||||
and compilations).
|
||||
disc_number: Number of the disc or other physical medium the track belongs to,
|
||||
as an integer.
|
||||
release_year: Year (YYYY) when the album was released.
|
||||
|
||||
Unless mentioned otherwise, the fields should be Unicode strings.
|
||||
|
||||
Unless mentioned otherwise, None is equivalent to absence of information.
|
||||
@@ -358,7 +376,6 @@ class InfoExtractor(object):
|
||||
self.to_screen('%s' % (note,))
|
||||
else:
|
||||
self.to_screen('%s: %s' % (video_id, note))
|
||||
# data, headers and query params will be ignored for `Request` objects
|
||||
if isinstance(url_or_request, compat_urllib_request.Request):
|
||||
url_or_request = update_Request(
|
||||
url_or_request, data=data, headers=headers, query=query)
|
||||
@@ -825,7 +842,7 @@ class InfoExtractor(object):
|
||||
for input in re.findall(r'(?i)<input([^>]+)>', html):
|
||||
if not re.search(r'type=(["\'])(?:hidden|submit)\1', input):
|
||||
continue
|
||||
name = re.search(r'name=(["\'])(?P<value>.+?)\1', input)
|
||||
name = re.search(r'(?:name|id)=(["\'])(?P<value>.+?)\1', input)
|
||||
if not name:
|
||||
continue
|
||||
value = re.search(r'value=(["\'])(?P<value>.*?)\1', input)
|
||||
@@ -1516,7 +1533,7 @@ class InfoExtractor(object):
|
||||
media_template = representation_ms_info['media_template']
|
||||
media_template = media_template.replace('$RepresentationID$', representation_id)
|
||||
media_template = re.sub(r'\$(Number|Bandwidth)\$', r'%(\1)d', media_template)
|
||||
media_template = re.sub(r'\$(Number|Bandwidth)%(\d+)\$', r'%(\1)\2d', media_template)
|
||||
media_template = re.sub(r'\$(Number|Bandwidth)%([^$]+)\$', r'%(\1)\2', media_template)
|
||||
media_template.replace('$$', '$')
|
||||
representation_ms_info['segment_urls'] = [
|
||||
media_template % {
|
||||
|
||||
@@ -17,37 +17,53 @@ class DemocracynowIE(InfoExtractor):
|
||||
IE_NAME = 'democracynow'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.democracynow.org/shows/2015/7/3',
|
||||
'md5': 'fbb8fe3d7a56a5e12431ce2f9b2fab0d',
|
||||
'md5': '3757c182d3d84da68f5c8f506c18c196',
|
||||
'info_dict': {
|
||||
'id': '2015-0703-001',
|
||||
'ext': 'mp4',
|
||||
'title': 'July 03, 2015 - Democracy Now!',
|
||||
'description': 'A daily independent global news hour with Amy Goodman & Juan González "What to the Slave is 4th of July?": James Earl Jones Reads Frederick Douglass\u2019 Historic Speech : "This Flag Comes Down Today": Bree Newsome Scales SC Capitol Flagpole, Takes Down Confederate Flag : "We Shall Overcome": Remembering Folk Icon, Activist Pete Seeger in His Own Words & Songs',
|
||||
'title': 'Daily Show',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree',
|
||||
'md5': 'fbb8fe3d7a56a5e12431ce2f9b2fab0d',
|
||||
'info_dict': {
|
||||
'id': '2015-0703-001',
|
||||
'ext': 'mp4',
|
||||
'title': '"This Flag Comes Down Today": Bree Newsome Scales SC Capitol Flagpole, Takes Down Confederate Flag',
|
||||
'description': 'md5:4d2bc4f0d29f5553c2210a4bc7761a21',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
description = self._og_search_description(webpage)
|
||||
|
||||
json_data = self._parse_json(self._search_regex(
|
||||
r'<script[^>]+type="text/json"[^>]*>\s*({[^>]+})', webpage, 'json'),
|
||||
display_id)
|
||||
video_id = None
|
||||
|
||||
title = json_data['title']
|
||||
formats = []
|
||||
|
||||
default_lang = 'en'
|
||||
video_id = None
|
||||
|
||||
for key in ('file', 'audio', 'video', 'high_res_video'):
|
||||
media_url = json_data.get(key, '')
|
||||
if not media_url:
|
||||
continue
|
||||
media_url = re.sub(r'\?.*', '', compat_urlparse.urljoin(url, media_url))
|
||||
video_id = video_id or remove_start(os.path.splitext(url_basename(media_url))[0], 'dn')
|
||||
formats.append({
|
||||
'url': media_url,
|
||||
'vcodec': 'none' if key == 'audio' else None,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
default_lang = 'en'
|
||||
subtitles = {}
|
||||
|
||||
def add_subtitle_item(lang, info_dict):
|
||||
@@ -67,22 +83,13 @@ class DemocracynowIE(InfoExtractor):
|
||||
'url': compat_urlparse.urljoin(url, subtitle_item['url']),
|
||||
})
|
||||
|
||||
for key in ('file', 'audio', 'video'):
|
||||
media_url = json_data.get(key, '')
|
||||
if not media_url:
|
||||
continue
|
||||
media_url = re.sub(r'\?.*', '', compat_urlparse.urljoin(url, media_url))
|
||||
video_id = video_id or remove_start(os.path.splitext(url_basename(media_url))[0], 'dn')
|
||||
formats.append({
|
||||
'url': media_url,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
description = self._og_search_description(webpage, default=None)
|
||||
|
||||
return {
|
||||
'id': video_id or display_id,
|
||||
'title': json_data['title'],
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': json_data.get('image'),
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
@@ -4,10 +4,10 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class EbaumsWorldIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.ebaumsworld\.com/video/watch/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?ebaumsworld\.com/videos/[^/]+/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.ebaumsworld.com/video/watch/83367677/',
|
||||
'url': 'http://www.ebaumsworld.com/videos/a-giant-python-opens-the-door/83367677/',
|
||||
'info_dict': {
|
||||
'id': '83367677',
|
||||
'ext': 'mp4',
|
||||
|
||||
992
youtube_dl/extractor/extractors.py
Normal file
992
youtube_dl/extractor/extractors.py
Normal file
@@ -0,0 +1,992 @@
|
||||
# flake8: noqa
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .abc import ABCIE
|
||||
from .abc7news import Abc7NewsIE
|
||||
from .academicearth import AcademicEarthCourseIE
|
||||
from .acast import (
|
||||
ACastIE,
|
||||
ACastChannelIE,
|
||||
)
|
||||
from .addanime import AddAnimeIE
|
||||
from .adobetv import (
|
||||
AdobeTVIE,
|
||||
AdobeTVShowIE,
|
||||
AdobeTVChannelIE,
|
||||
AdobeTVVideoIE,
|
||||
)
|
||||
from .adultswim import AdultSwimIE
|
||||
from .aenetworks import AENetworksIE
|
||||
from .aftonbladet import AftonbladetIE
|
||||
from .airmozilla import AirMozillaIE
|
||||
from .aljazeera import AlJazeeraIE
|
||||
from .alphaporno import AlphaPornoIE
|
||||
from .animeondemand import AnimeOnDemandIE
|
||||
from .anitube import AnitubeIE
|
||||
from .anysex import AnySexIE
|
||||
from .aol import (
|
||||
AolIE,
|
||||
AolFeaturesIE,
|
||||
)
|
||||
from .allocine import AllocineIE
|
||||
from .aparat import AparatIE
|
||||
from .appleconnect import AppleConnectIE
|
||||
from .appletrailers import (
|
||||
AppleTrailersIE,
|
||||
AppleTrailersSectionIE,
|
||||
)
|
||||
from .archiveorg import ArchiveOrgIE
|
||||
from .ard import (
|
||||
ARDIE,
|
||||
ARDMediathekIE,
|
||||
SportschauIE,
|
||||
)
|
||||
from .arte import (
|
||||
ArteTvIE,
|
||||
ArteTVPlus7IE,
|
||||
ArteTVCreativeIE,
|
||||
ArteTVConcertIE,
|
||||
ArteTVFutureIE,
|
||||
ArteTVCinemaIE,
|
||||
ArteTVDDCIE,
|
||||
ArteTVMagazineIE,
|
||||
ArteTVEmbedIE,
|
||||
)
|
||||
from .atresplayer import AtresPlayerIE
|
||||
from .atttechchannel import ATTTechChannelIE
|
||||
from .audimedia import AudiMediaIE
|
||||
from .audioboom import AudioBoomIE
|
||||
from .audiomack import AudiomackIE, AudiomackAlbumIE
|
||||
from .azubu import AzubuIE, AzubuLiveIE
|
||||
from .baidu import BaiduVideoIE
|
||||
from .bambuser import BambuserIE, BambuserChannelIE
|
||||
from .bandcamp import BandcampIE, BandcampAlbumIE
|
||||
from .bbc import (
|
||||
BBCCoUkIE,
|
||||
BBCCoUkArticleIE,
|
||||
BBCIE,
|
||||
)
|
||||
from .beeg import BeegIE
|
||||
from .behindkink import BehindKinkIE
|
||||
from .beatportpro import BeatportProIE
|
||||
from .bet import BetIE
|
||||
from .bigflix import BigflixIE
|
||||
from .bild import BildIE
|
||||
from .bilibili import BiliBiliIE
|
||||
from .biobiochiletv import BioBioChileTVIE
|
||||
from .bleacherreport import (
|
||||
BleacherReportIE,
|
||||
BleacherReportCMSIE,
|
||||
)
|
||||
from .blinkx import BlinkxIE
|
||||
from .bloomberg import BloombergIE
|
||||
from .bokecc import BokeCCIE
|
||||
from .bpb import BpbIE
|
||||
from .br import BRIE
|
||||
from .bravotv import BravoTVIE
|
||||
from .breakcom import BreakIE
|
||||
from .brightcove import (
|
||||
BrightcoveLegacyIE,
|
||||
BrightcoveNewIE,
|
||||
)
|
||||
from .buzzfeed import BuzzFeedIE
|
||||
from .byutv import BYUtvIE
|
||||
from .c56 import C56IE
|
||||
from .camdemy import (
|
||||
CamdemyIE,
|
||||
CamdemyFolderIE
|
||||
)
|
||||
from .camwithher import CamWithHerIE
|
||||
from .canalplus import CanalplusIE
|
||||
from .canalc2 import Canalc2IE
|
||||
from .canvas import CanvasIE
|
||||
from .cbc import (
|
||||
CBCIE,
|
||||
CBCPlayerIE,
|
||||
)
|
||||
from .cbs import CBSIE
|
||||
from .cbsinteractive import CBSInteractiveIE
|
||||
from .cbsnews import (
|
||||
CBSNewsIE,
|
||||
CBSNewsLiveVideoIE,
|
||||
)
|
||||
from .cbssports import CBSSportsIE
|
||||
from .ccc import CCCIE
|
||||
from .cda import CDAIE
|
||||
from .ceskatelevize import CeskaTelevizeIE
|
||||
from .channel9 import Channel9IE
|
||||
from .chaturbate import ChaturbateIE
|
||||
from .chilloutzone import ChilloutzoneIE
|
||||
from .chirbit import (
|
||||
ChirbitIE,
|
||||
ChirbitProfileIE,
|
||||
)
|
||||
from .cinchcast import CinchcastIE
|
||||
from .cinemassacre import CinemassacreIE
|
||||
from .cliprs import ClipRsIE
|
||||
from .clipfish import ClipfishIE
|
||||
from .cliphunter import CliphunterIE
|
||||
from .clipsyndicate import ClipsyndicateIE
|
||||
from .cloudy import CloudyIE
|
||||
from .clubic import ClubicIE
|
||||
from .clyp import ClypIE
|
||||
from .cmt import CMTIE
|
||||
from .cnbc import CNBCIE
|
||||
from .cnn import (
|
||||
CNNIE,
|
||||
CNNBlogsIE,
|
||||
CNNArticleIE,
|
||||
)
|
||||
from .collegehumor import CollegeHumorIE
|
||||
from .collegerama import CollegeRamaIE
|
||||
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
|
||||
from .comcarcoff import ComCarCoffIE
|
||||
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
|
||||
from .commonprotocols import RtmpIE
|
||||
from .condenast import CondeNastIE
|
||||
from .cracked import CrackedIE
|
||||
from .crackle import CrackleIE
|
||||
from .criterion import CriterionIE
|
||||
from .crooksandliars import CrooksAndLiarsIE
|
||||
from .crunchyroll import (
|
||||
CrunchyrollIE,
|
||||
CrunchyrollShowPlaylistIE
|
||||
)
|
||||
from .cspan import CSpanIE
|
||||
from .ctsnews import CtsNewsIE
|
||||
from .cultureunplugged import CultureUnpluggedIE
|
||||
from .cwtv import CWTVIE
|
||||
from .dailymotion import (
|
||||
DailymotionIE,
|
||||
DailymotionPlaylistIE,
|
||||
DailymotionUserIE,
|
||||
DailymotionCloudIE,
|
||||
)
|
||||
from .daum import (
|
||||
DaumIE,
|
||||
DaumClipIE,
|
||||
DaumPlaylistIE,
|
||||
DaumUserIE,
|
||||
)
|
||||
from .dbtv import DBTVIE
|
||||
from .dcn import (
|
||||
DCNIE,
|
||||
DCNVideoIE,
|
||||
DCNLiveIE,
|
||||
DCNSeasonIE,
|
||||
)
|
||||
from .dctp import DctpTvIE
|
||||
from .deezer import DeezerPlaylistIE
|
||||
from .democracynow import DemocracynowIE
|
||||
from .dfb import DFBIE
|
||||
from .dhm import DHMIE
|
||||
from .dotsub import DotsubIE
|
||||
from .douyutv import DouyuTVIE
|
||||
from .dplay import DPlayIE
|
||||
from .dramafever import (
|
||||
DramaFeverIE,
|
||||
DramaFeverSeriesIE,
|
||||
)
|
||||
from .dreisat import DreiSatIE
|
||||
from .drbonanza import DRBonanzaIE
|
||||
from .drtuber import DrTuberIE
|
||||
from .drtv import DRTVIE
|
||||
from .dvtv import DVTVIE
|
||||
from .dump import DumpIE
|
||||
from .dumpert import DumpertIE
|
||||
from .defense import DefenseGouvFrIE
|
||||
from .discovery import DiscoveryIE
|
||||
from .dropbox import DropboxIE
|
||||
from .dw import (
|
||||
DWIE,
|
||||
DWArticleIE,
|
||||
)
|
||||
from .eagleplatform import EaglePlatformIE
|
||||
from .ebaumsworld import EbaumsWorldIE
|
||||
from .echomsk import EchoMskIE
|
||||
from .ehow import EHowIE
|
||||
from .eighttracks import EightTracksIE
|
||||
from .einthusan import EinthusanIE
|
||||
from .eitb import EitbIE
|
||||
from .ellentv import (
|
||||
EllenTVIE,
|
||||
EllenTVClipsIE,
|
||||
)
|
||||
from .elpais import ElPaisIE
|
||||
from .embedly import EmbedlyIE
|
||||
from .engadget import EngadgetIE
|
||||
from .eporner import EpornerIE
|
||||
from .eroprofile import EroProfileIE
|
||||
from .escapist import EscapistIE
|
||||
from .espn import ESPNIE
|
||||
from .esri import EsriVideoIE
|
||||
from .europa import EuropaIE
|
||||
from .everyonesmixtape import EveryonesMixtapeIE
|
||||
from .exfm import ExfmIE
|
||||
from .expotv import ExpoTVIE
|
||||
from .extremetube import ExtremeTubeIE
|
||||
from .facebook import FacebookIE
|
||||
from .faz import FazIE
|
||||
from .fc2 import FC2IE
|
||||
from .fczenit import FczenitIE
|
||||
from .firstpost import FirstpostIE
|
||||
from .firsttv import FirstTVIE
|
||||
from .fivemin import FiveMinIE
|
||||
from .fivetv import FiveTVIE
|
||||
from .fktv import FKTVIE
|
||||
from .flickr import FlickrIE
|
||||
from .folketinget import FolketingetIE
|
||||
from .footyroom import FootyRoomIE
|
||||
from .fourtube import FourTubeIE
|
||||
from .fox import FOXIE
|
||||
from .foxgay import FoxgayIE
|
||||
from .foxnews import FoxNewsIE
|
||||
from .foxsports import FoxSportsIE
|
||||
from .franceculture import (
|
||||
FranceCultureIE,
|
||||
FranceCultureEmissionIE,
|
||||
)
|
||||
from .franceinter import FranceInterIE
|
||||
from .francetv import (
|
||||
PluzzIE,
|
||||
FranceTvInfoIE,
|
||||
FranceTVIE,
|
||||
GenerationQuoiIE,
|
||||
CultureboxIE,
|
||||
)
|
||||
from .freesound import FreesoundIE
|
||||
from .freespeech import FreespeechIE
|
||||
from .freevideo import FreeVideoIE
|
||||
from .funimation import FunimationIE
|
||||
from .funnyordie import FunnyOrDieIE
|
||||
from .gameinformer import GameInformerIE
|
||||
from .gamekings import GamekingsIE
|
||||
from .gameone import (
|
||||
GameOneIE,
|
||||
GameOnePlaylistIE,
|
||||
)
|
||||
from .gamersyde import GamersydeIE
|
||||
from .gamespot import GameSpotIE
|
||||
from .gamestar import GameStarIE
|
||||
from .gametrailers import GametrailersIE
|
||||
from .gazeta import GazetaIE
|
||||
from .gdcvault import GDCVaultIE
|
||||
from .generic import GenericIE
|
||||
from .gfycat import GfycatIE
|
||||
from .giantbomb import GiantBombIE
|
||||
from .giga import GigaIE
|
||||
from .glide import GlideIE
|
||||
from .globo import (
|
||||
GloboIE,
|
||||
GloboArticleIE,
|
||||
)
|
||||
from .godtube import GodTubeIE
|
||||
from .goldenmoustache import GoldenMoustacheIE
|
||||
from .golem import GolemIE
|
||||
from .googledrive import GoogleDriveIE
|
||||
from .googleplus import GooglePlusIE
|
||||
from .googlesearch import GoogleSearchIE
|
||||
from .goshgay import GoshgayIE
|
||||
from .gputechconf import GPUTechConfIE
|
||||
from .groupon import GrouponIE
|
||||
from .hark import HarkIE
|
||||
from .hbo import HBOIE
|
||||
from .hearthisat import HearThisAtIE
|
||||
from .heise import HeiseIE
|
||||
from .hellporno import HellPornoIE
|
||||
from .helsinki import HelsinkiIE
|
||||
from .hentaistigma import HentaiStigmaIE
|
||||
from .historicfilms import HistoricFilmsIE
|
||||
from .hitbox import HitboxIE, HitboxLiveIE
|
||||
from .hornbunny import HornBunnyIE
|
||||
from .hotnewhiphop import HotNewHipHopIE
|
||||
from .hotstar import HotStarIE
|
||||
from .howcast import HowcastIE
|
||||
from .howstuffworks import HowStuffWorksIE
|
||||
from .huffpost import HuffPostIE
|
||||
from .hypem import HypemIE
|
||||
from .iconosquare import IconosquareIE
|
||||
from .ign import (
|
||||
IGNIE,
|
||||
OneUPIE,
|
||||
PCMagIE,
|
||||
)
|
||||
from .imdb import (
|
||||
ImdbIE,
|
||||
ImdbListIE
|
||||
)
|
||||
from .imgur import (
|
||||
ImgurIE,
|
||||
ImgurAlbumIE,
|
||||
)
|
||||
from .ina import InaIE
|
||||
from .indavideo import (
|
||||
IndavideoIE,
|
||||
IndavideoEmbedIE,
|
||||
)
|
||||
from .infoq import InfoQIE
|
||||
from .instagram import InstagramIE, InstagramUserIE
|
||||
from .internetvideoarchive import InternetVideoArchiveIE
|
||||
from .iprima import IPrimaIE
|
||||
from .iqiyi import IqiyiIE
|
||||
from .ir90tv import Ir90TvIE
|
||||
from .ivi import (
|
||||
IviIE,
|
||||
IviCompilationIE
|
||||
)
|
||||
from .ivideon import IvideonIE
|
||||
from .izlesene import IzleseneIE
|
||||
from .jeuxvideo import JeuxVideoIE
|
||||
from .jove import JoveIE
|
||||
from .jwplatform import JWPlatformIE
|
||||
from .jpopsukitv import JpopsukiIE
|
||||
from .kaltura import KalturaIE
|
||||
from .kanalplay import KanalPlayIE
|
||||
from .kankan import KankanIE
|
||||
from .karaoketv import KaraoketvIE
|
||||
from .karrierevideos import KarriereVideosIE
|
||||
from .keezmovies import KeezMoviesIE
|
||||
from .khanacademy import KhanAcademyIE
|
||||
from .kickstarter import KickStarterIE
|
||||
from .keek import KeekIE
|
||||
from .konserthusetplay import KonserthusetPlayIE
|
||||
from .kontrtube import KontrTubeIE
|
||||
from .krasview import KrasViewIE
|
||||
from .ku6 import Ku6IE
|
||||
from .kusi import KUSIIE
|
||||
from .kuwo import (
|
||||
KuwoIE,
|
||||
KuwoAlbumIE,
|
||||
KuwoChartIE,
|
||||
KuwoSingerIE,
|
||||
KuwoCategoryIE,
|
||||
KuwoMvIE,
|
||||
)
|
||||
from .la7 import LA7IE
|
||||
from .laola1tv import Laola1TvIE
|
||||
from .lecture2go import Lecture2GoIE
|
||||
from .lemonde import LemondeIE
|
||||
from .leeco import (
|
||||
LeIE,
|
||||
LePlaylistIE,
|
||||
LetvCloudIE,
|
||||
)
|
||||
from .libsyn import LibsynIE
|
||||
from .lifenews import (
|
||||
LifeNewsIE,
|
||||
LifeEmbedIE,
|
||||
)
|
||||
from .limelight import (
|
||||
LimelightMediaIE,
|
||||
LimelightChannelIE,
|
||||
LimelightChannelListIE,
|
||||
)
|
||||
from .liveleak import LiveLeakIE
|
||||
from .livestream import (
|
||||
LivestreamIE,
|
||||
LivestreamOriginalIE,
|
||||
LivestreamShortenerIE,
|
||||
)
|
||||
from .lnkgo import LnkGoIE
|
||||
from .lovehomeporn import LoveHomePornIE
|
||||
from .lrt import LRTIE
|
||||
from .lynda import (
|
||||
LyndaIE,
|
||||
LyndaCourseIE
|
||||
)
|
||||
from .m6 import M6IE
|
||||
from .macgamestore import MacGameStoreIE
|
||||
from .mailru import MailRuIE
|
||||
from .makerschannel import MakersChannelIE
|
||||
from .makertv import MakerTVIE
|
||||
from .malemotion import MalemotionIE
|
||||
from .matchtv import MatchTVIE
|
||||
from .mdr import MDRIE
|
||||
from .metacafe import MetacafeIE
|
||||
from .metacritic import MetacriticIE
|
||||
from .mgoon import MgoonIE
|
||||
from .minhateca import MinhatecaIE
|
||||
from .ministrygrid import MinistryGridIE
|
||||
from .minoto import MinotoIE
|
||||
from .miomio import MioMioIE
|
||||
from .mit import TechTVMITIE, MITIE, OCWMITIE
|
||||
from .mitele import MiTeleIE
|
||||
from .mixcloud import MixcloudIE
|
||||
from .mlb import MLBIE
|
||||
from .mnet import MnetIE
|
||||
from .mpora import MporaIE
|
||||
from .moevideo import MoeVideoIE
|
||||
from .mofosex import MofosexIE
|
||||
from .mojvideo import MojvideoIE
|
||||
from .moniker import MonikerIE
|
||||
from .mooshare import MooshareIE
|
||||
from .morningstar import MorningstarIE
|
||||
from .motherless import MotherlessIE
|
||||
from .motorsport import MotorsportIE
|
||||
from .movieclips import MovieClipsIE
|
||||
from .moviezine import MoviezineIE
|
||||
from .mtv import (
|
||||
MTVIE,
|
||||
MTVServicesEmbeddedIE,
|
||||
MTVIggyIE,
|
||||
MTVDEIE,
|
||||
)
|
||||
from .muenchentv import MuenchenTVIE
|
||||
from .musicplayon import MusicPlayOnIE
|
||||
from .muzu import MuzuTVIE
|
||||
from .mwave import MwaveIE
|
||||
from .myspace import MySpaceIE, MySpaceAlbumIE
|
||||
from .myspass import MySpassIE
|
||||
from .myvi import MyviIE
|
||||
from .myvideo import MyVideoIE
|
||||
from .myvidster import MyVidsterIE
|
||||
from .nationalgeographic import (
|
||||
NationalGeographicIE,
|
||||
NationalGeographicChannelIE,
|
||||
)
|
||||
from .naver import NaverIE
|
||||
from .nba import NBAIE
|
||||
from .nbc import (
|
||||
CSNNEIE,
|
||||
NBCIE,
|
||||
NBCNewsIE,
|
||||
NBCSportsIE,
|
||||
NBCSportsVPlayerIE,
|
||||
MSNBCIE,
|
||||
)
|
||||
from .ndr import (
|
||||
NDRIE,
|
||||
NJoyIE,
|
||||
NDREmbedBaseIE,
|
||||
NDREmbedIE,
|
||||
NJoyEmbedIE,
|
||||
)
|
||||
from .ndtv import NDTVIE
|
||||
from .netzkino import NetzkinoIE
|
||||
from .nerdcubed import NerdCubedFeedIE
|
||||
from .nerdist import NerdistIE
|
||||
from .neteasemusic import (
|
||||
NetEaseMusicIE,
|
||||
NetEaseMusicAlbumIE,
|
||||
NetEaseMusicSingerIE,
|
||||
NetEaseMusicListIE,
|
||||
NetEaseMusicMvIE,
|
||||
NetEaseMusicProgramIE,
|
||||
NetEaseMusicDjRadioIE,
|
||||
)
|
||||
from .newgrounds import NewgroundsIE
|
||||
from .newstube import NewstubeIE
|
||||
from .nextmedia import (
|
||||
NextMediaIE,
|
||||
NextMediaActionNewsIE,
|
||||
AppleDailyIE,
|
||||
)
|
||||
from .nextmovie import NextMovieIE
|
||||
from .nfb import NFBIE
|
||||
from .nfl import NFLIE
|
||||
from .nhl import (
|
||||
NHLIE,
|
||||
NHLNewsIE,
|
||||
NHLVideocenterIE,
|
||||
)
|
||||
from .nick import NickIE
|
||||
from .niconico import NiconicoIE, NiconicoPlaylistIE
|
||||
from .ninegag import NineGagIE
|
||||
from .noco import NocoIE
|
||||
from .normalboots import NormalbootsIE
|
||||
from .nosvideo import NosVideoIE
|
||||
from .nova import NovaIE
|
||||
from .novamov import (
|
||||
AuroraVidIE,
|
||||
CloudTimeIE,
|
||||
NowVideoIE,
|
||||
VideoWeedIE,
|
||||
WholeCloudIE,
|
||||
)
|
||||
from .nowness import (
|
||||
NownessIE,
|
||||
NownessPlaylistIE,
|
||||
NownessSeriesIE,
|
||||
)
|
||||
from .nowtv import (
|
||||
NowTVIE,
|
||||
NowTVListIE,
|
||||
)
|
||||
from .noz import NozIE
|
||||
from .npo import (
|
||||
NPOIE,
|
||||
NPOLiveIE,
|
||||
NPORadioIE,
|
||||
NPORadioFragmentIE,
|
||||
SchoolTVIE,
|
||||
VPROIE,
|
||||
WNLIE
|
||||
)
|
||||
from .npr import NprIE
|
||||
from .nrk import (
|
||||
NRKIE,
|
||||
NRKPlaylistIE,
|
||||
NRKSkoleIE,
|
||||
NRKTVIE,
|
||||
)
|
||||
from .ntvde import NTVDeIE
|
||||
from .ntvru import NTVRuIE
|
||||
from .nytimes import (
|
||||
NYTimesIE,
|
||||
NYTimesArticleIE,
|
||||
)
|
||||
from .nuvid import NuvidIE
|
||||
from .odnoklassniki import OdnoklassnikiIE
|
||||
from .oktoberfesttv import OktoberfestTVIE
|
||||
from .onionstudios import OnionStudiosIE
|
||||
from .ooyala import (
|
||||
OoyalaIE,
|
||||
OoyalaExternalIE,
|
||||
)
|
||||
from .openload import OpenloadIE
|
||||
from .ora import OraTVIE
|
||||
from .orf import (
|
||||
ORFTVthekIE,
|
||||
ORFOE1IE,
|
||||
ORFFM4IE,
|
||||
ORFIPTVIE,
|
||||
)
|
||||
from .pandoratv import PandoraTVIE
|
||||
from .parliamentliveuk import ParliamentLiveUKIE
|
||||
from .patreon import PatreonIE
|
||||
from .pbs import PBSIE
|
||||
from .periscope import PeriscopeIE
|
||||
from .philharmoniedeparis import PhilharmonieDeParisIE
|
||||
from .phoenix import PhoenixIE
|
||||
from .photobucket import PhotobucketIE
|
||||
from .pinkbike import PinkbikeIE
|
||||
from .planetaplay import PlanetaPlayIE
|
||||
from .pladform import PladformIE
|
||||
from .played import PlayedIE
|
||||
from .playfm import PlayFMIE
|
||||
from .plays import PlaysTVIE
|
||||
from .playtvak import PlaytvakIE
|
||||
from .playvid import PlayvidIE
|
||||
from .playwire import PlaywireIE
|
||||
from .pluralsight import (
|
||||
PluralsightIE,
|
||||
PluralsightCourseIE,
|
||||
)
|
||||
from .podomatic import PodomaticIE
|
||||
from .porn91 import Porn91IE
|
||||
from .pornhd import PornHdIE
|
||||
from .pornhub import (
|
||||
PornHubIE,
|
||||
PornHubPlaylistIE,
|
||||
PornHubUserVideosIE,
|
||||
)
|
||||
from .pornotube import PornotubeIE
|
||||
from .pornovoisines import PornoVoisinesIE
|
||||
from .pornoxo import PornoXOIE
|
||||
from .presstv import PressTVIE
|
||||
from .primesharetv import PrimeShareTVIE
|
||||
from .promptfile import PromptFileIE
|
||||
from .prosiebensat1 import ProSiebenSat1IE
|
||||
from .puls4 import Puls4IE
|
||||
from .pyvideo import PyvideoIE
|
||||
from .qqmusic import (
|
||||
QQMusicIE,
|
||||
QQMusicSingerIE,
|
||||
QQMusicAlbumIE,
|
||||
QQMusicToplistIE,
|
||||
QQMusicPlaylistIE,
|
||||
)
|
||||
from .quickvid import QuickVidIE
|
||||
from .r7 import R7IE
|
||||
from .radiode import RadioDeIE
|
||||
from .radiojavan import RadioJavanIE
|
||||
from .radiobremen import RadioBremenIE
|
||||
from .radiofrance import RadioFranceIE
|
||||
from .rai import (
|
||||
RaiTVIE,
|
||||
RaiIE,
|
||||
)
|
||||
from .rbmaradio import RBMARadioIE
|
||||
from .rds import RDSIE
|
||||
from .redtube import RedTubeIE
|
||||
from .regiotv import RegioTVIE
|
||||
from .restudy import RestudyIE
|
||||
from .reverbnation import ReverbNationIE
|
||||
from .revision3 import Revision3IE
|
||||
from .rice import RICEIE
|
||||
from .ringtv import RingTVIE
|
||||
from .ro220 import Ro220IE
|
||||
from .rottentomatoes import RottenTomatoesIE
|
||||
from .roxwel import RoxwelIE
|
||||
from .rtbf import RTBFIE
|
||||
from .rte import RteIE, RteRadioIE
|
||||
from .rtlnl import RtlNlIE
|
||||
from .rtl2 import RTL2IE
|
||||
from .rtp import RTPIE
|
||||
from .rts import RTSIE
|
||||
from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE
|
||||
from .rtvnh import RTVNHIE
|
||||
from .ruhd import RUHDIE
|
||||
from .ruleporn import RulePornIE
|
||||
from .rutube import (
|
||||
RutubeIE,
|
||||
RutubeChannelIE,
|
||||
RutubeEmbedIE,
|
||||
RutubeMovieIE,
|
||||
RutubePersonIE,
|
||||
)
|
||||
from .rutv import RUTVIE
|
||||
from .ruutu import RuutuIE
|
||||
from .sandia import SandiaIE
|
||||
from .safari import (
|
||||
SafariIE,
|
||||
SafariApiIE,
|
||||
SafariCourseIE,
|
||||
)
|
||||
from .sapo import SapoIE
|
||||
from .savefrom import SaveFromIE
|
||||
from .sbs import SBSIE
|
||||
from .scivee import SciVeeIE
|
||||
from .screencast import ScreencastIE
|
||||
from .screencastomatic import ScreencastOMaticIE
|
||||
from .screenjunkies import ScreenJunkiesIE
|
||||
from .screenwavemedia import ScreenwaveMediaIE, TeamFourIE
|
||||
from .senateisvp import SenateISVPIE
|
||||
from .servingsys import ServingSysIE
|
||||
from .sexu import SexuIE
|
||||
from .sexykarma import SexyKarmaIE
|
||||
from .shahid import ShahidIE
|
||||
from .shared import SharedIE
|
||||
from .sharesix import ShareSixIE
|
||||
from .sina import SinaIE
|
||||
from .skynewsarabia import (
|
||||
SkyNewsArabiaIE,
|
||||
SkyNewsArabiaArticleIE,
|
||||
)
|
||||
from .slideshare import SlideshareIE
|
||||
from .slutload import SlutloadIE
|
||||
from .smotri import (
|
||||
SmotriIE,
|
||||
SmotriCommunityIE,
|
||||
SmotriUserIE,
|
||||
SmotriBroadcastIE,
|
||||
)
|
||||
from .snagfilms import (
|
||||
SnagFilmsIE,
|
||||
SnagFilmsEmbedIE,
|
||||
)
|
||||
from .snotr import SnotrIE
|
||||
from .sohu import SohuIE
|
||||
from .soundcloud import (
|
||||
SoundcloudIE,
|
||||
SoundcloudSetIE,
|
||||
SoundcloudUserIE,
|
||||
SoundcloudPlaylistIE,
|
||||
SoundcloudSearchIE
|
||||
)
|
||||
from .soundgasm import (
|
||||
SoundgasmIE,
|
||||
SoundgasmProfileIE
|
||||
)
|
||||
from .southpark import (
|
||||
SouthParkIE,
|
||||
SouthParkDeIE,
|
||||
SouthParkDkIE,
|
||||
SouthParkEsIE,
|
||||
SouthParkNlIE
|
||||
)
|
||||
from .spankbang import SpankBangIE
|
||||
from .spankwire import SpankwireIE
|
||||
from .spiegel import SpiegelIE, SpiegelArticleIE
|
||||
from .spiegeltv import SpiegeltvIE
|
||||
from .spike import SpikeIE
|
||||
from .stitcher import StitcherIE
|
||||
from .sport5 import Sport5IE
|
||||
from .sportbox import (
|
||||
SportBoxIE,
|
||||
SportBoxEmbedIE,
|
||||
)
|
||||
from .sportdeutschland import SportDeutschlandIE
|
||||
from .srgssr import (
|
||||
SRGSSRIE,
|
||||
SRGSSRPlayIE,
|
||||
)
|
||||
from .srmediathek import SRMediathekIE
|
||||
from .ssa import SSAIE
|
||||
from .stanfordoc import StanfordOpenClassroomIE
|
||||
from .steam import SteamIE
|
||||
from .streamcloud import StreamcloudIE
|
||||
from .streamcz import StreamCZIE
|
||||
from .streetvoice import StreetVoiceIE
|
||||
from .sunporno import SunPornoIE
|
||||
from .svt import (
|
||||
SVTIE,
|
||||
SVTPlayIE,
|
||||
)
|
||||
from .swrmediathek import SWRMediathekIE
|
||||
from .syfy import SyfyIE
|
||||
from .sztvhu import SztvHuIE
|
||||
from .tagesschau import TagesschauIE
|
||||
from .tapely import TapelyIE
|
||||
from .tass import TassIE
|
||||
from .teachertube import (
|
||||
TeacherTubeIE,
|
||||
TeacherTubeUserIE,
|
||||
)
|
||||
from .teachingchannel import TeachingChannelIE
|
||||
from .teamcoco import TeamcocoIE
|
||||
from .techtalks import TechTalksIE
|
||||
from .ted import TEDIE
|
||||
from .tele13 import Tele13IE
|
||||
from .telebruxelles import TeleBruxellesIE
|
||||
from .telecinco import TelecincoIE
|
||||
from .telegraaf import TelegraafIE
|
||||
from .telemb import TeleMBIE
|
||||
from .teletask import TeleTaskIE
|
||||
from .testurl import TestURLIE
|
||||
from .tf1 import TF1IE
|
||||
from .theintercept import TheInterceptIE
|
||||
from .theonion import TheOnionIE
|
||||
from .theplatform import (
|
||||
ThePlatformIE,
|
||||
ThePlatformFeedIE,
|
||||
)
|
||||
from .thescene import TheSceneIE
|
||||
from .thesixtyone import TheSixtyOneIE
|
||||
from .thestar import TheStarIE
|
||||
from .thisamericanlife import ThisAmericanLifeIE
|
||||
from .thisav import ThisAVIE
|
||||
from .tinypic import TinyPicIE
|
||||
from .tlc import TlcDeIE
|
||||
from .tmz import (
|
||||
TMZIE,
|
||||
TMZArticleIE,
|
||||
)
|
||||
from .tnaflix import (
|
||||
TNAFlixNetworkEmbedIE,
|
||||
TNAFlixIE,
|
||||
EMPFlixIE,
|
||||
MovieFapIE,
|
||||
)
|
||||
from .toggle import ToggleIE
|
||||
from .thvideo import (
|
||||
THVideoIE,
|
||||
THVideoPlaylistIE
|
||||
)
|
||||
from .toutv import TouTvIE
|
||||
from .toypics import ToypicsUserIE, ToypicsIE
|
||||
from .traileraddict import TrailerAddictIE
|
||||
from .trilulilu import TriluliluIE
|
||||
from .trollvids import TrollvidsIE
|
||||
from .trutube import TruTubeIE
|
||||
from .tube8 import Tube8IE
|
||||
from .tubitv import TubiTvIE
|
||||
from .tudou import (
|
||||
TudouIE,
|
||||
TudouPlaylistIE,
|
||||
TudouAlbumIE,
|
||||
)
|
||||
from .tumblr import TumblrIE
|
||||
from .tunein import (
|
||||
TuneInClipIE,
|
||||
TuneInStationIE,
|
||||
TuneInProgramIE,
|
||||
TuneInTopicIE,
|
||||
TuneInShortenerIE,
|
||||
)
|
||||
from .turbo import TurboIE
|
||||
from .tutv import TutvIE
|
||||
from .tv2 import (
|
||||
TV2IE,
|
||||
TV2ArticleIE,
|
||||
)
|
||||
from .tv3 import TV3IE
|
||||
from .tv4 import TV4IE
|
||||
from .tvc import (
|
||||
TVCIE,
|
||||
TVCArticleIE,
|
||||
)
|
||||
from .tvigle import TvigleIE
|
||||
from .tvland import TVLandIE
|
||||
from .tvp import TvpIE, TvpSeriesIE
|
||||
from .tvplay import TVPlayIE
|
||||
from .tweakers import TweakersIE
|
||||
from .twentyfourvideo import TwentyFourVideoIE
|
||||
from .twentymin import TwentyMinutenIE
|
||||
from .twentytwotracks import (
|
||||
TwentyTwoTracksIE,
|
||||
TwentyTwoTracksGenreIE
|
||||
)
|
||||
from .twitch import (
|
||||
TwitchVideoIE,
|
||||
TwitchChapterIE,
|
||||
TwitchVodIE,
|
||||
TwitchProfileIE,
|
||||
TwitchPastBroadcastsIE,
|
||||
TwitchBookmarksIE,
|
||||
TwitchStreamIE,
|
||||
)
|
||||
from .twitter import (
|
||||
TwitterCardIE,
|
||||
TwitterIE,
|
||||
TwitterAmplifyIE,
|
||||
)
|
||||
from .ubu import UbuIE
|
||||
from .udemy import (
|
||||
UdemyIE,
|
||||
UdemyCourseIE
|
||||
)
|
||||
from .udn import UDNEmbedIE
|
||||
from .digiteka import DigitekaIE
|
||||
from .unistra import UnistraIE
|
||||
from .urort import UrortIE
|
||||
from .usatoday import USATodayIE
|
||||
from .ustream import UstreamIE, UstreamChannelIE
|
||||
from .ustudio import UstudioIE
|
||||
from .varzesh3 import Varzesh3IE
|
||||
from .vbox7 import Vbox7IE
|
||||
from .veehd import VeeHDIE
|
||||
from .veoh import VeohIE
|
||||
from .vessel import VesselIE
|
||||
from .vesti import VestiIE
|
||||
from .vevo import VevoIE
|
||||
from .vgtv import (
|
||||
BTArticleIE,
|
||||
BTVestlendingenIE,
|
||||
VGTVIE,
|
||||
)
|
||||
from .vh1 import VH1IE
|
||||
from .vice import (
|
||||
ViceIE,
|
||||
ViceShowIE,
|
||||
)
|
||||
from .viddler import ViddlerIE
|
||||
from .videodetective import VideoDetectiveIE
|
||||
from .videofyme import VideofyMeIE
|
||||
from .videomega import VideoMegaIE
|
||||
from .videomore import (
|
||||
VideomoreIE,
|
||||
VideomoreVideoIE,
|
||||
VideomoreSeasonIE,
|
||||
)
|
||||
from .videopremium import VideoPremiumIE
|
||||
from .videott import VideoTtIE
|
||||
from .vidme import (
|
||||
VidmeIE,
|
||||
VidmeUserIE,
|
||||
VidmeUserLikesIE,
|
||||
)
|
||||
from .vidzi import VidziIE
|
||||
from .vier import VierIE, VierVideosIE
|
||||
from .viewster import ViewsterIE
|
||||
from .viidea import ViideaIE
|
||||
from .vimeo import (
|
||||
VimeoIE,
|
||||
VimeoAlbumIE,
|
||||
VimeoChannelIE,
|
||||
VimeoGroupsIE,
|
||||
VimeoLikesIE,
|
||||
VimeoOndemandIE,
|
||||
VimeoReviewIE,
|
||||
VimeoUserIE,
|
||||
VimeoWatchLaterIE,
|
||||
)
|
||||
from .vimple import VimpleIE
|
||||
from .vine import (
|
||||
VineIE,
|
||||
VineUserIE,
|
||||
)
|
||||
from .viki import (
|
||||
VikiIE,
|
||||
VikiChannelIE,
|
||||
)
|
||||
from .vk import (
|
||||
VKIE,
|
||||
VKUserVideosIE,
|
||||
)
|
||||
from .vlive import VLiveIE
|
||||
from .vodlocker import VodlockerIE
|
||||
from .voicerepublic import VoiceRepublicIE
|
||||
from .voxmedia import VoxMediaIE
|
||||
from .vporn import VpornIE
|
||||
from .vrt import VRTIE
|
||||
from .vube import VubeIE
|
||||
from .vuclip import VuClipIE
|
||||
from .vulture import VultureIE
|
||||
from .walla import WallaIE
|
||||
from .washingtonpost import WashingtonPostIE
|
||||
from .wat import WatIE
|
||||
from .wayofthemaster import WayOfTheMasterIE
|
||||
from .wdr import (
|
||||
WDRIE,
|
||||
WDRMobileIE,
|
||||
WDRMausIE,
|
||||
)
|
||||
from .webofstories import (
|
||||
WebOfStoriesIE,
|
||||
WebOfStoriesPlaylistIE,
|
||||
)
|
||||
from .weibo import WeiboIE
|
||||
from .weiqitv import WeiqiTVIE
|
||||
from .wimp import WimpIE
|
||||
from .wistia import WistiaIE
|
||||
from .worldstarhiphop import WorldStarHipHopIE
|
||||
from .wrzuta import WrzutaIE
|
||||
from .wsj import WSJIE
|
||||
from .xbef import XBefIE
|
||||
from .xboxclips import XboxClipsIE
|
||||
from .xfileshare import XFileShareIE
|
||||
from .xhamster import (
|
||||
XHamsterIE,
|
||||
XHamsterEmbedIE,
|
||||
)
|
||||
from .xminus import XMinusIE
|
||||
from .xnxx import XNXXIE
|
||||
from .xstream import XstreamIE
|
||||
from .xtube import XTubeUserIE, XTubeIE
|
||||
from .xuite import XuiteIE
|
||||
from .xvideos import XVideosIE
|
||||
from .xxxymovies import XXXYMoviesIE
|
||||
from .yahoo import (
|
||||
YahooIE,
|
||||
YahooSearchIE,
|
||||
)
|
||||
from .yam import YamIE
|
||||
from .yandexmusic import (
|
||||
YandexMusicTrackIE,
|
||||
YandexMusicAlbumIE,
|
||||
YandexMusicPlaylistIE,
|
||||
)
|
||||
from .yesjapan import YesJapanIE
|
||||
from .yinyuetai import YinYueTaiIE
|
||||
from .ynet import YnetIE
|
||||
from .youjizz import YouJizzIE
|
||||
from .youku import YoukuIE
|
||||
from .youporn import YouPornIE
|
||||
from .yourupload import YourUploadIE
|
||||
from .youtube import (
|
||||
YoutubeIE,
|
||||
YoutubeChannelIE,
|
||||
YoutubeFavouritesIE,
|
||||
YoutubeHistoryIE,
|
||||
YoutubeLiveIE,
|
||||
YoutubePlaylistIE,
|
||||
YoutubePlaylistsIE,
|
||||
YoutubeRecommendedIE,
|
||||
YoutubeSearchDateIE,
|
||||
YoutubeSearchIE,
|
||||
YoutubeSearchURLIE,
|
||||
YoutubeShowIE,
|
||||
YoutubeSubscriptionsIE,
|
||||
YoutubeTruncatedIDIE,
|
||||
YoutubeTruncatedURLIE,
|
||||
YoutubeUserIE,
|
||||
YoutubeWatchLaterIE,
|
||||
)
|
||||
from .zapiks import ZapiksIE
|
||||
from .zdf import ZDFIE, ZDFChannelIE
|
||||
from .zingmp3 import (
|
||||
ZingMp3SongIE,
|
||||
ZingMp3AlbumIE,
|
||||
)
|
||||
from .zippcast import ZippCastIE
|
||||
@@ -2,78 +2,133 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
from ..compat import compat_xpath
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
qualities,
|
||||
unified_strdate,
|
||||
xpath_attr,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
xpath_with_ns,
|
||||
)
|
||||
|
||||
|
||||
class FirstTVIE(InfoExtractor):
|
||||
IE_NAME = '1tv'
|
||||
IE_DESC = 'Первый канал'
|
||||
_VALID_URL = r'https?://(?:www\.)?1tv\.ru/(?:[^/]+/)+(?P<id>.+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?1tv\.ru/(?:[^/]+/)+p?(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.1tv.ru/videoarchive/73390',
|
||||
'md5': '777f525feeec4806130f4f764bc18a4f',
|
||||
'info_dict': {
|
||||
'id': '73390',
|
||||
'ext': 'mp4',
|
||||
'title': 'Олимпийские канатные дороги',
|
||||
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||
'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
|
||||
'duration': 149,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
},
|
||||
'skip': 'Only works from Russia',
|
||||
}, {
|
||||
# single format via video_materials.json API
|
||||
'url': 'http://www.1tv.ru/prj/inprivate/vypusk/35930',
|
||||
'md5': 'a1b6b60d530ebcf8daacf4565762bbaf',
|
||||
'md5': '82a2777648acae812d58b3f5bd42882b',
|
||||
'info_dict': {
|
||||
'id': '35930',
|
||||
'ext': 'mp4',
|
||||
'title': 'Наедине со всеми. Людмила Сенчина',
|
||||
'description': 'md5:89553aed1d641416001fe8d450f06cb9',
|
||||
'title': 'Гость Людмила Сенчина. Наедине со всеми. Выпуск от 12.02.2015',
|
||||
'description': 'md5:357933adeede13b202c7c21f91b871b2',
|
||||
'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
|
||||
'upload_date': '20150212',
|
||||
'duration': 2694,
|
||||
},
|
||||
'skip': 'Only works from Russia',
|
||||
}, {
|
||||
# multiple formats via video_materials.json API
|
||||
'url': 'http://www.1tv.ru/video_archive/projects/dobroeutro/p113641',
|
||||
'info_dict': {
|
||||
'id': '113641',
|
||||
'ext': 'mp4',
|
||||
'title': 'Весенняя аллергия. Доброе утро. Фрагмент выпуска от 07.04.2016',
|
||||
'description': 'md5:8dcebb3dded0ff20fade39087fd1fee2',
|
||||
'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
|
||||
'upload_date': '20160407',
|
||||
'duration': 179,
|
||||
'formats': 'mincount:3',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# single format only available via ONE_ONLINE_VIDEOS.archive_single_xml API
|
||||
'url': 'http://www.1tv.ru/video_archive/series/f7552/p47038',
|
||||
'md5': '519d306c5b5669761fd8906c39dbee23',
|
||||
'info_dict': {
|
||||
'id': '47038',
|
||||
'ext': 'mp4',
|
||||
'title': '"Побег". Второй сезон. 3 серия',
|
||||
'description': 'md5:3abf8f6b9bce88201c33e9a3d794a00b',
|
||||
'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
|
||||
'upload_date': '20120516',
|
||||
'duration': 3080,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.1tv.ru/videoarchive/9967',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id, 'Downloading page')
|
||||
# Videos with multiple formats only available via this API
|
||||
video = self._download_json(
|
||||
'http://www.1tv.ru/video_materials.json?legacy_id=%s' % video_id,
|
||||
video_id, fatal=False)
|
||||
|
||||
video_url = self._html_search_regex(
|
||||
r'''(?s)(?:jwplayer\('flashvideoportal_1'\)\.setup\({|var\s+playlistObj\s*=).*?'file'\s*:\s*'([^']+)'.*?}\);''',
|
||||
webpage, 'video URL')
|
||||
description, thumbnail, upload_date, duration = [None] * 4
|
||||
|
||||
title = self._html_search_regex(
|
||||
[r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>',
|
||||
r"'title'\s*:\s*'([^']+)'"], webpage, 'title')
|
||||
description = self._html_search_regex(
|
||||
r'<div class="descr">\s*<div> </div>\s*<p>([^<]*)</p></div>',
|
||||
webpage, 'description', default=None) or self._html_search_meta(
|
||||
if video:
|
||||
item = video[0]
|
||||
title = item['title']
|
||||
quality = qualities(('ld', 'sd', 'hd', ))
|
||||
formats = [{
|
||||
'url': f['src'],
|
||||
'format_id': f.get('name'),
|
||||
'quality': quality(f.get('name')),
|
||||
} for f in item['mbr'] if f.get('src')]
|
||||
thumbnail = item.get('poster')
|
||||
else:
|
||||
# Some videos are not available via video_materials.json
|
||||
video = self._download_xml(
|
||||
'http://www.1tv.ru/owa/win/ONE_ONLINE_VIDEOS.archive_single_xml?pid=%s' % video_id,
|
||||
video_id)
|
||||
|
||||
NS_MAP = {
|
||||
'media': 'http://search.yahoo.com/mrss/',
|
||||
}
|
||||
|
||||
item = xpath_element(video, './channel/item', fatal=True)
|
||||
title = xpath_text(item, './title', fatal=True)
|
||||
formats = [{
|
||||
'url': content.attrib['url'],
|
||||
} for content in item.findall(
|
||||
compat_xpath(xpath_with_ns('./media:content', NS_MAP))) if content.attrib.get('url')]
|
||||
thumbnail = xpath_attr(
|
||||
item, xpath_with_ns('./media:thumbnail', NS_MAP), 'url')
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
webpage = self._download_webpage(url, video_id, 'Downloading page', fatal=False)
|
||||
if webpage:
|
||||
title = self._html_search_regex(
|
||||
(r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>',
|
||||
r"'title'\s*:\s*'([^']+)'"),
|
||||
webpage, 'title', default=None) or title
|
||||
description = self._html_search_regex(
|
||||
r'<div class="descr">\s*<div> </div>\s*<p>([^<]*)</p></div>',
|
||||
webpage, 'description', default=None) or self._html_search_meta(
|
||||
'description', webpage, 'description')
|
||||
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
duration = self._og_search_property(
|
||||
'video:duration', webpage,
|
||||
'video duration', fatal=False)
|
||||
|
||||
like_count = self._html_search_regex(
|
||||
r'title="Понравилось".*?/></label> \[(\d+)\]',
|
||||
webpage, 'like count', default=None)
|
||||
dislike_count = self._html_search_regex(
|
||||
r'title="Не понравилось".*?/></label> \[(\d+)\]',
|
||||
webpage, 'dislike count', default=None)
|
||||
thumbnail = thumbnail or self._og_search_thumbnail(webpage)
|
||||
duration = int_or_none(self._html_search_meta(
|
||||
'video:duration', webpage, 'video duration', fatal=False))
|
||||
upload_date = unified_strdate(self._html_search_meta(
|
||||
'ya:ovs:upload_date', webpage, 'upload date', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'thumbnail': thumbnail,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'upload_date': upload_date,
|
||||
'duration': int_or_none(duration),
|
||||
'like_count': int_or_none(like_count),
|
||||
'dislike_count': int_or_none(dislike_count),
|
||||
'formats': formats
|
||||
}
|
||||
|
||||
@@ -46,8 +46,8 @@ class FunnyOrDieIE(InfoExtractor):
|
||||
links.sort(key=lambda link: 1 if link[1] == 'mp4' else 0)
|
||||
|
||||
m3u8_url = self._search_regex(
|
||||
r'<source[^>]+src=(["\'])(?P<url>.+?/master\.m3u8)\1',
|
||||
webpage, 'm3u8 url', default=None, group='url')
|
||||
r'<source[^>]+src=(["\'])(?P<url>.+?/master\.m3u8[^"\']*)\1',
|
||||
webpage, 'm3u8 url', group='url')
|
||||
|
||||
formats = []
|
||||
|
||||
|
||||
@@ -159,9 +159,10 @@ class GDCVaultIE(InfoExtractor):
|
||||
'title': title,
|
||||
}
|
||||
|
||||
PLAYER_REGEX = r'<iframe src="(?P<xml_root>.+?)/player.*?\.html.*?".*?</iframe>'
|
||||
|
||||
xml_root = self._html_search_regex(
|
||||
r'<iframe src="(?P<xml_root>.*?)player.html.*?".*?</iframe>',
|
||||
start_page, 'xml root', default=None)
|
||||
PLAYER_REGEX, start_page, 'xml root', default=None)
|
||||
if xml_root is None:
|
||||
# Probably need to authenticate
|
||||
login_res = self._login(webpage_url, display_id)
|
||||
@@ -171,18 +172,19 @@ class GDCVaultIE(InfoExtractor):
|
||||
start_page = login_res
|
||||
# Grab the url from the authenticated page
|
||||
xml_root = self._html_search_regex(
|
||||
r'<iframe src="(.*?)player.html.*?".*?</iframe>',
|
||||
start_page, 'xml root')
|
||||
PLAYER_REGEX, start_page, 'xml root')
|
||||
|
||||
xml_name = self._html_search_regex(
|
||||
r'<iframe src=".*?\?xml=(.+?\.xml).*?".*?</iframe>',
|
||||
start_page, 'xml filename', default=None)
|
||||
if xml_name is None:
|
||||
# Fallback to the older format
|
||||
xml_name = self._html_search_regex(r'<iframe src=".*?\?xmlURL=xml/(?P<xml_file>.+?\.xml).*?".*?</iframe>', start_page, 'xml filename')
|
||||
xml_name = self._html_search_regex(
|
||||
r'<iframe src=".*?\?xmlURL=xml/(?P<xml_file>.+?\.xml).*?".*?</iframe>',
|
||||
start_page, 'xml filename')
|
||||
|
||||
xml_description_url = xml_root + 'xml/' + xml_name
|
||||
xml_description = self._download_xml(xml_description_url, display_id)
|
||||
xml_description = self._download_xml(
|
||||
'%s/xml/%s' % (xml_root, xml_name), display_id)
|
||||
|
||||
video_title = xml_description.find('./metadata/title').text
|
||||
video_formats = self._parse_mp4(xml_description)
|
||||
|
||||
@@ -1128,6 +1128,18 @@ class GenericIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# Another form of arte.tv embed
|
||||
{
|
||||
'url': 'http://www.tv-replay.fr/redirection/09-04-16/arte-reportage-arte-11508975.html',
|
||||
'md5': '850bfe45417ddf221288c88a0cffe2e2',
|
||||
'info_dict': {
|
||||
'id': '030273-562_PLUS7-F',
|
||||
'ext': 'mp4',
|
||||
'title': 'ARTE Reportage - Nulle part, en France',
|
||||
'description': 'md5:e3a0e8868ed7303ed509b9e3af2b870d',
|
||||
'upload_date': '20160409',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def report_following_redirect(self, new_url):
|
||||
@@ -1702,7 +1714,7 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
# Look for embedded arte.tv player
|
||||
mobj = re.search(
|
||||
r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
|
||||
r'<(?:script|iframe) [^>]*?src="(?P<url>http://www\.arte\.tv/(?:playerv2/embed|arte_vp/index)[^"]+)"',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'ArteTVEmbed')
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_strdate
|
||||
|
||||
|
||||
class GlideIE(InfoExtractor):
|
||||
@@ -15,26 +16,38 @@ class GlideIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Damon Timm\'s Glide message',
|
||||
'thumbnail': 're:^https?://.*?\.cloudfront\.net/.*\.jpg$',
|
||||
'uploader': 'Damon Timm',
|
||||
'upload_date': '20140919',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<title>(.*?)</title>', webpage, 'title')
|
||||
video_url = self.http_scheme() + self._search_regex(
|
||||
r'<source src="(.*?)" type="video/mp4">', webpage, 'video URL')
|
||||
thumbnail_url = self._search_regex(
|
||||
r'<img id="video-thumbnail" src="(.*?)"',
|
||||
webpage, 'thumbnail url', fatal=False)
|
||||
thumbnail = (
|
||||
thumbnail_url if thumbnail_url is None
|
||||
else self.http_scheme() + thumbnail_url)
|
||||
r'<title>(.+?)</title>', webpage, 'title')
|
||||
video_url = self._proto_relative_url(self._search_regex(
|
||||
r'<source[^>]+src=(["\'])(?P<url>.+?)\1',
|
||||
webpage, 'video URL', default=None,
|
||||
group='url')) or self._og_search_video_url(webpage)
|
||||
thumbnail = self._proto_relative_url(self._search_regex(
|
||||
r'<img[^>]+id=["\']video-thumbnail["\'][^>]+src=(["\'])(?P<url>.+?)\1',
|
||||
webpage, 'thumbnail url', default=None,
|
||||
group='url')) or self._og_search_thumbnail(webpage)
|
||||
uploader = self._search_regex(
|
||||
r'<div[^>]+class=["\']info-name["\'][^>]*>([^<]+)',
|
||||
webpage, 'uploader', fatal=False)
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'<div[^>]+class="info-date"[^>]*>([^<]+)',
|
||||
webpage, 'upload date', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'upload_date': upload_date,
|
||||
}
|
||||
|
||||
@@ -14,13 +14,13 @@ class GoshgayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.goshgay\.com/video(?P<id>\d+?)($|/)'
|
||||
_TEST = {
|
||||
'url': 'http://www.goshgay.com/video299069/diesel_sfw_xxx_video',
|
||||
'md5': '027fcc54459dff0feb0bc06a7aeda680',
|
||||
'md5': '4b6db9a0a333142eb9f15913142b0ed1',
|
||||
'info_dict': {
|
||||
'id': '299069',
|
||||
'ext': 'flv',
|
||||
'title': 'DIESEL SFW XXX Video',
|
||||
'thumbnail': 're:^http://.*\.jpg$',
|
||||
'duration': 79,
|
||||
'duration': 80,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
@@ -47,5 +47,5 @@ class GoshgayIE(InfoExtractor):
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'age_limit': self._family_friendly_search(webpage),
|
||||
'age_limit': 18,
|
||||
}
|
||||
|
||||
@@ -16,14 +16,14 @@ class GrouponIE(InfoExtractor):
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': 'tubGNycTo_9Uxg82uESj4i61EYX8nyuf',
|
||||
'ext': 'mp4',
|
||||
'ext': 'flv',
|
||||
'title': 'Bikram Yoga Huntington Beach | Orange County',
|
||||
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||
'duration': 44.961,
|
||||
},
|
||||
}],
|
||||
'params': {
|
||||
'skip_download': 'HLS',
|
||||
'skip_download': 'HDS',
|
||||
}
|
||||
}
|
||||
|
||||
@@ -32,7 +32,7 @@ class GrouponIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
payload = self._parse_json(self._search_regex(
|
||||
r'var\s+payload\s*=\s*(.*?);\n', webpage, 'payload'), playlist_id)
|
||||
r'(?:var\s+|window\.)payload\s*=\s*(.*?);\n', webpage, 'payload'), playlist_id)
|
||||
videos = payload['carousel'].get('dealVideos', [])
|
||||
entries = []
|
||||
for v in videos:
|
||||
|
||||
@@ -24,6 +24,7 @@ class HowStuffWorksIE(InfoExtractor):
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 161,
|
||||
},
|
||||
'skip': 'Video broken',
|
||||
},
|
||||
{
|
||||
'url': 'http://adventure.howstuffworks.com/7199-survival-zone-food-and-water-in-the-savanna-video.htm',
|
||||
|
||||
@@ -1,93 +1,91 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urlparse,
|
||||
compat_urllib_parse_urlencode,
|
||||
)
|
||||
from ..utils import (
|
||||
xpath_with_ns,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
class InternetVideoArchiveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://video\.internetvideoarchive\.net/flash/players/.*?\?.*?publishedid.*?'
|
||||
_VALID_URL = r'https?://video\.internetvideoarchive\.net/(?:player|flash/players)/.*?\?.*?publishedid.*?'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?customerid=69249&publishedid=452693&playerid=247',
|
||||
'url': 'http://video.internetvideoarchive.net/player/6/configuration.ashx?customerid=69249&publishedid=194487&reporttag=vdbetatitle&playerid=641&autolist=0&domain=www.videodetective.com&maxrate=high&minrate=low&socialplayer=false',
|
||||
'info_dict': {
|
||||
'id': '452693',
|
||||
'id': '194487',
|
||||
'ext': 'mp4',
|
||||
'title': 'SKYFALL',
|
||||
'description': 'In SKYFALL, Bond\'s loyalty to M is tested as her past comes back to haunt her. As MI6 comes under attack, 007 must track down and destroy the threat, no matter how personal the cost.',
|
||||
'duration': 152,
|
||||
'title': 'KICK-ASS 2',
|
||||
'description': 'md5:c189d5b7280400630a1d3dd17eaa8d8a',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _build_url(query):
|
||||
return 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?' + query
|
||||
def _build_json_url(query):
|
||||
return 'http://video.internetvideoarchive.net/player/6/configuration.ashx?' + query
|
||||
|
||||
@staticmethod
|
||||
def _clean_query(query):
|
||||
NEEDED_ARGS = ['publishedid', 'customerid']
|
||||
query_dic = compat_urlparse.parse_qs(query)
|
||||
cleaned_dic = dict((k, v[0]) for (k, v) in query_dic.items() if k in NEEDED_ARGS)
|
||||
# Other player ids return m3u8 urls
|
||||
cleaned_dic['playerid'] = '247'
|
||||
cleaned_dic['videokbrate'] = '100000'
|
||||
return compat_urllib_parse_urlencode(cleaned_dic)
|
||||
def _build_xml_url(query):
|
||||
return 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?' + query
|
||||
|
||||
def _real_extract(self, url):
|
||||
query = compat_urlparse.urlparse(url).query
|
||||
query_dic = compat_urlparse.parse_qs(query)
|
||||
query_dic = compat_parse_qs(query)
|
||||
video_id = query_dic['publishedid'][0]
|
||||
url = self._build_url(query)
|
||||
|
||||
flashconfiguration = self._download_xml(url, video_id,
|
||||
'Downloading flash configuration')
|
||||
file_url = flashconfiguration.find('file').text
|
||||
file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx')
|
||||
# Replace some of the parameters in the query to get the best quality
|
||||
# and http links (no m3u8 manifests)
|
||||
file_url = re.sub(r'(?<=\?)(.+)$',
|
||||
lambda m: self._clean_query(m.group()),
|
||||
file_url)
|
||||
info = self._download_xml(file_url, video_id,
|
||||
'Downloading video info')
|
||||
item = info.find('channel/item')
|
||||
if '/player/' in url:
|
||||
configuration = self._download_json(url, video_id)
|
||||
|
||||
def _bp(p):
|
||||
return xpath_with_ns(
|
||||
p,
|
||||
{
|
||||
'media': 'http://search.yahoo.com/mrss/',
|
||||
'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats',
|
||||
}
|
||||
)
|
||||
formats = []
|
||||
for content in item.findall(_bp('media:group/media:content')):
|
||||
attr = content.attrib
|
||||
f_url = attr['url']
|
||||
width = int(attr['width'])
|
||||
bitrate = int(attr['bitrate'])
|
||||
format_id = '%d-%dk' % (width, bitrate)
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': f_url,
|
||||
'width': width,
|
||||
'tbr': bitrate,
|
||||
})
|
||||
# There are multiple videos in the playlist whlie only the first one
|
||||
# matches the video played in browsers
|
||||
video_info = configuration['playlist'][0]
|
||||
|
||||
self._sort_formats(formats)
|
||||
formats = []
|
||||
for source in video_info['sources']:
|
||||
file_url = source['file']
|
||||
if determine_ext(file_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
file_url, video_id, ext='mp4', m3u8_id='hls'))
|
||||
else:
|
||||
a_format = {
|
||||
'url': file_url,
|
||||
}
|
||||
|
||||
if source.get('label') and source['label'][-4:] == ' kbs':
|
||||
tbr = int_or_none(source['label'][:-4])
|
||||
a_format.update({
|
||||
'tbr': tbr,
|
||||
'format_id': 'http-%d' % tbr,
|
||||
})
|
||||
formats.append(a_format)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = video_info['title']
|
||||
description = video_info.get('description')
|
||||
thumbnail = video_info.get('image')
|
||||
else:
|
||||
configuration = self._download_xml(url, video_id)
|
||||
formats = [{
|
||||
'url': xpath_text(configuration, './file', 'file URL', fatal=True),
|
||||
}]
|
||||
thumbnail = xpath_text(configuration, './image', 'thumbnail')
|
||||
title = 'InternetVideoArchive video %s' % video_id
|
||||
description = None
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': item.find('title').text,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': item.find(_bp('media:thumbnail')).attrib['url'],
|
||||
'description': item.find('description').text,
|
||||
'duration': int(attr['duration']),
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
}
|
||||
|
||||
@@ -29,7 +29,7 @@ class IzleseneIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Sevinçten Çıldırtan Doğum Günü Hediyesi',
|
||||
'description': 'md5:253753e2655dde93f59f74b572454f6d',
|
||||
'thumbnail': 're:^http://.*\.jpg',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'uploader_id': 'pelikzzle',
|
||||
'timestamp': int,
|
||||
'upload_date': '20140702',
|
||||
@@ -44,8 +44,7 @@ class IzleseneIE(InfoExtractor):
|
||||
'id': '17997',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tarkan Dortmund 2006 Konseri',
|
||||
'description': 'Tarkan Dortmund 2006 Konseri',
|
||||
'thumbnail': 're:^http://.*\.jpg',
|
||||
'thumbnail': 're:^https://.*\.jpg',
|
||||
'uploader_id': 'parlayankiz',
|
||||
'timestamp': int,
|
||||
'upload_date': '20061112',
|
||||
@@ -62,7 +61,7 @@ class IzleseneIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
description = self._og_search_description(webpage, default=None)
|
||||
thumbnail = self._proto_relative_url(
|
||||
self._og_search_thumbnail(webpage), scheme='http:')
|
||||
|
||||
|
||||
@@ -1,47 +0,0 @@
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
|
||||
|
||||
class JadoreCettePubIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?jadorecettepub\.com/[0-9]{4}/[0-9]{2}/(?P<id>.*?)\.html'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.jadorecettepub.com/2010/12/star-wars-massacre-par-les-japonais.html',
|
||||
'md5': '401286a06067c70b44076044b66515de',
|
||||
'info_dict': {
|
||||
'id': 'jLMja3tr7a4',
|
||||
'ext': 'mp4',
|
||||
'title': 'La pire utilisation de Star Wars',
|
||||
'description': "Jadorecettepub.com vous a gratifié de plusieurs pubs géniales utilisant Star Wars et Dark Vador plus particulièrement... Mais l'heure est venue de vous proposer une version totalement massacrée, venue du Japon. Quand les Japonais détruisent l'image de Star Wars pour vendre du thon en boite, ça promet...",
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<span style="font-size: x-large;"><b>(.*?)</b></span>',
|
||||
webpage, 'title')
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<div id="fb-root">(.*?)<script>', webpage, 'description',
|
||||
fatal=False)
|
||||
real_url = self._search_regex(
|
||||
r'\[/postlink\](.*)endofvid', webpage, 'video URL')
|
||||
video_id = YoutubeIE.extract_id(real_url)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': real_url,
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
}
|
||||
@@ -4,16 +4,15 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class JWPlatformBaseIE(InfoExtractor):
|
||||
def _parse_jwplayer_data(self, jwplayer_data, video_id, require_title=True):
|
||||
video_data = jwplayer_data['playlist'][0]
|
||||
subtitles = {}
|
||||
for track in video_data['tracks']:
|
||||
if track['kind'] == 'captions':
|
||||
subtitles[track['label']] = [{'url': self._proto_relative_url(track['file'])}]
|
||||
|
||||
formats = []
|
||||
for source in video_data['sources']:
|
||||
@@ -35,12 +34,22 @@ class JWPlatformBaseIE(InfoExtractor):
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
tracks = video_data.get('tracks')
|
||||
if tracks and isinstance(tracks, list):
|
||||
for track in tracks:
|
||||
if track.get('file') and track.get('kind') == 'captions':
|
||||
subtitles.setdefault(track.get('label') or 'en', []).append({
|
||||
'url': self._proto_relative_url(track['file'])
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_data['title'] if require_title else video_data.get('title'),
|
||||
'description': video_data.get('description'),
|
||||
'thumbnail': self._proto_relative_url(video_data.get('image')),
|
||||
'timestamp': int_or_none(video_data.get('pubdate')),
|
||||
'duration': float_or_none(jwplayer_data.get('duration')),
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
@@ -7,6 +7,7 @@ from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
HEADRequest,
|
||||
NO_DEFAULT,
|
||||
parse_count,
|
||||
str_to_int,
|
||||
)
|
||||
@@ -63,8 +64,17 @@ class MixcloudIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, track_id)
|
||||
|
||||
message = self._html_search_regex(
|
||||
r'(?s)<div[^>]+class="global-message cloudcast-disabled-notice-light"[^>]*>(.+?)<(?:a|/div)',
|
||||
webpage, 'error message', default=None)
|
||||
|
||||
preview_url = self._search_regex(
|
||||
r'\s(?:data-preview-url|m-preview)="([^"]+)"', webpage, 'preview url')
|
||||
r'\s(?:data-preview-url|m-preview)="([^"]+)"',
|
||||
webpage, 'preview url', default=None if message else NO_DEFAULT)
|
||||
|
||||
if message:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True)
|
||||
|
||||
song_url = re.sub(r'audiocdn(\d+)', r'stream\1', preview_url)
|
||||
song_url = song_url.replace('/previews/', '/c/originals/')
|
||||
if not self._check_url(song_url, track_id, 'mp3'):
|
||||
|
||||
@@ -89,6 +89,7 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
|
||||
'timestamp': 1431878400,
|
||||
'description': 'md5:a10a54589c2860300d02e1de821eb2ef',
|
||||
},
|
||||
'skip': 'Blocked outside Mainland China',
|
||||
}, {
|
||||
'note': 'No lyrics translation.',
|
||||
'url': 'http://music.163.com/#/song?id=29822014',
|
||||
@@ -101,6 +102,7 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
|
||||
'timestamp': 1419523200,
|
||||
'description': 'md5:a4d8d89f44656af206b7b2555c0bce6c',
|
||||
},
|
||||
'skip': 'Blocked outside Mainland China',
|
||||
}, {
|
||||
'note': 'No lyrics.',
|
||||
'url': 'http://music.163.com/song?id=17241424',
|
||||
@@ -112,6 +114,7 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
|
||||
'upload_date': '20080211',
|
||||
'timestamp': 1202745600,
|
||||
},
|
||||
'skip': 'Blocked outside Mainland China',
|
||||
}, {
|
||||
'note': 'Has translated name.',
|
||||
'url': 'http://music.163.com/#/song?id=22735043',
|
||||
@@ -124,7 +127,8 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
|
||||
'upload_date': '20100127',
|
||||
'timestamp': 1264608000,
|
||||
'alt_title': '说出愿望吧(Genie)',
|
||||
}
|
||||
},
|
||||
'skip': 'Blocked outside Mainland China',
|
||||
}]
|
||||
|
||||
def _process_lyrics(self, lyrics_info):
|
||||
@@ -192,6 +196,7 @@ class NetEaseMusicAlbumIE(NetEaseMusicBaseIE):
|
||||
'title': 'B\'day',
|
||||
},
|
||||
'playlist_count': 23,
|
||||
'skip': 'Blocked outside Mainland China',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -223,6 +228,7 @@ class NetEaseMusicSingerIE(NetEaseMusicBaseIE):
|
||||
'title': '张惠妹 - aMEI;阿密特',
|
||||
},
|
||||
'playlist_count': 50,
|
||||
'skip': 'Blocked outside Mainland China',
|
||||
}, {
|
||||
'note': 'Singer has translated name.',
|
||||
'url': 'http://music.163.com/#/artist?id=124098',
|
||||
@@ -231,6 +237,7 @@ class NetEaseMusicSingerIE(NetEaseMusicBaseIE):
|
||||
'title': '李昇基 - 이승기',
|
||||
},
|
||||
'playlist_count': 50,
|
||||
'skip': 'Blocked outside Mainland China',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -266,6 +273,7 @@ class NetEaseMusicListIE(NetEaseMusicBaseIE):
|
||||
'description': 'md5:12fd0819cab2965b9583ace0f8b7b022'
|
||||
},
|
||||
'playlist_count': 99,
|
||||
'skip': 'Blocked outside Mainland China',
|
||||
}, {
|
||||
'note': 'Toplist/Charts sample',
|
||||
'url': 'http://music.163.com/#/discover/toplist?id=3733003',
|
||||
@@ -275,6 +283,7 @@ class NetEaseMusicListIE(NetEaseMusicBaseIE):
|
||||
'description': 'md5:73ec782a612711cadc7872d9c1e134fc',
|
||||
},
|
||||
'playlist_count': 50,
|
||||
'skip': 'Blocked outside Mainland China',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -314,6 +323,7 @@ class NetEaseMusicMvIE(NetEaseMusicBaseIE):
|
||||
'creator': '白雅言',
|
||||
'upload_date': '20150520',
|
||||
},
|
||||
'skip': 'Blocked outside Mainland China',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -357,6 +367,7 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
|
||||
'upload_date': '20150613',
|
||||
'duration': 900,
|
||||
},
|
||||
'skip': 'Blocked outside Mainland China',
|
||||
}, {
|
||||
'note': 'This program has accompanying songs.',
|
||||
'url': 'http://music.163.com/#/program?id=10141022',
|
||||
@@ -366,6 +377,7 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
|
||||
'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
|
||||
},
|
||||
'playlist_count': 4,
|
||||
'skip': 'Blocked outside Mainland China',
|
||||
}, {
|
||||
'note': 'This program has accompanying songs.',
|
||||
'url': 'http://music.163.com/#/program?id=10141022',
|
||||
@@ -379,7 +391,8 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
|
||||
},
|
||||
'params': {
|
||||
'noplaylist': True
|
||||
}
|
||||
},
|
||||
'skip': 'Blocked outside Mainland China',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -438,6 +451,7 @@ class NetEaseMusicDjRadioIE(NetEaseMusicBaseIE):
|
||||
'description': 'md5:766220985cbd16fdd552f64c578a6b15'
|
||||
},
|
||||
'playlist_mincount': 40,
|
||||
'skip': 'Blocked outside Mainland China',
|
||||
}
|
||||
_PAGE_SIZE = 1000
|
||||
|
||||
|
||||
@@ -16,7 +16,14 @@ class NovaMovIE(InfoExtractor):
|
||||
IE_NAME = 'novamov'
|
||||
IE_DESC = 'NovaMov'
|
||||
|
||||
_VALID_URL_TEMPLATE = r'http://(?:(?:www\.)?%(host)s/(?:file|video|mobile/#/videos)/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P<id>[a-z\d]{13})'
|
||||
_VALID_URL_TEMPLATE = r'''(?x)
|
||||
http://
|
||||
(?:
|
||||
(?:www\.)?%(host)s/(?:file|video|mobile/\#/videos)/|
|
||||
(?:(?:embed|www)\.)%(host)s/embed(?:\.php|/)?\?(?:.*?&)?\bv=
|
||||
)
|
||||
(?P<id>[a-z\d]{13})
|
||||
'''
|
||||
_VALID_URL = _VALID_URL_TEMPLATE % {'host': 'novamov\.com'}
|
||||
|
||||
_HOST = 'www.novamov.com'
|
||||
@@ -189,7 +196,7 @@ class AuroraVidIE(NovaMovIE):
|
||||
|
||||
_FILE_DELETED_REGEX = r'This file no longer exists on our servers!<'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.auroravid.to/video/4rurhn9x446jj',
|
||||
'md5': '7205f346a52bbeba427603ba10d4b935',
|
||||
'info_dict': {
|
||||
@@ -199,4 +206,7 @@ class AuroraVidIE(NovaMovIE):
|
||||
'description': 'search engine optimization is used to rank the web page in the google search engine'
|
||||
},
|
||||
'skip': '"Invalid token" errors abound (in web interface as well as youtube-dl, there is nothing we can do about it.)'
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.auroravid.to/embed/?v=4rurhn9x446jj',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
74
youtube_dl/extractor/presstv.py
Normal file
74
youtube_dl/extractor/presstv.py
Normal file
@@ -0,0 +1,74 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import remove_start
|
||||
|
||||
|
||||
class PressTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?presstv\.ir/[^/]+/(?P<y>\d+)/(?P<m>\d+)/(?P<d>\d+)/(?P<id>\d+)/(?P<display_id>[^/]+)?'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.presstv.ir/Detail/2016/04/09/459911/Australian-sewerage-treatment-facility-/',
|
||||
'md5': '5d7e3195a447cb13e9267e931d8dd5a5',
|
||||
'info_dict': {
|
||||
'id': '459911',
|
||||
'display_id': 'Australian-sewerage-treatment-facility-',
|
||||
'ext': 'mp4',
|
||||
'title': 'Organic mattresses used to clean waste water',
|
||||
'upload_date': '20160409',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'description': 'md5:20002e654bbafb6908395a5c0cfcd125'
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id') or video_id
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
# extract video URL from webpage
|
||||
video_url = self._hidden_inputs(webpage)['inpPlayback']
|
||||
|
||||
# build list of available formats
|
||||
# specified in http://www.presstv.ir/Scripts/playback.js
|
||||
base_url = 'http://192.99.219.222:82/presstv'
|
||||
_formats = [
|
||||
(180, '_low200.mp4'),
|
||||
(360, '_low400.mp4'),
|
||||
(720, '_low800.mp4'),
|
||||
(1080, '.mp4')
|
||||
]
|
||||
|
||||
formats = [{
|
||||
'url': base_url + video_url[:-4] + extension,
|
||||
'format_id': '%dp' % height,
|
||||
'height': height,
|
||||
} for height, extension in _formats]
|
||||
|
||||
# extract video metadata
|
||||
title = remove_start(
|
||||
self._html_search_meta('title', webpage, fatal=True), 'PressTV-')
|
||||
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
|
||||
upload_date = '%04d%02d%02d' % (
|
||||
int(mobj.group('y')),
|
||||
int(mobj.group('m')),
|
||||
int(mobj.group('d')),
|
||||
)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
'upload_date': upload_date,
|
||||
'description': description
|
||||
}
|
||||
@@ -1,11 +1,11 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .videodetective import VideoDetectiveIE
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from .internetvideoarchive import InternetVideoArchiveIE
|
||||
|
||||
|
||||
# It just uses the same method as videodetective.com,
|
||||
# the internetvideoarchive.com is extracted from the og:video property
|
||||
class RottenTomatoesIE(VideoDetectiveIE):
|
||||
class RottenTomatoesIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.rottentomatoes\.com/m/[^/]+/trailers/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
@@ -13,7 +13,19 @@ class RottenTomatoesIE(VideoDetectiveIE):
|
||||
'info_dict': {
|
||||
'id': '613340',
|
||||
'ext': 'mp4',
|
||||
'title': 'TOY STORY 3',
|
||||
'description': 'From the creators of the beloved TOY STORY films, comes a story that will reunite the gang in a whole new way.',
|
||||
'title': 'Toy Story 3',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
og_video = self._og_search_video_url(webpage)
|
||||
query = compat_urlparse.urlparse(og_video).query
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': InternetVideoArchiveIE._build_xml_url(query),
|
||||
'ie_key': InternetVideoArchiveIE.ie_key(),
|
||||
'title': self._og_search_title(webpage),
|
||||
}
|
||||
|
||||
@@ -1,15 +1,11 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
js_to_json,
|
||||
)
|
||||
from .jwplatform import JWPlatformBaseIE
|
||||
from ..utils import js_to_json
|
||||
|
||||
|
||||
class ScreencastOMaticIE(InfoExtractor):
|
||||
class ScreencastOMaticIE(JWPlatformBaseIE):
|
||||
_VALID_URL = r'https?://screencast-o-matic\.com/watch/(?P<id>[0-9a-zA-Z]+)'
|
||||
_TEST = {
|
||||
'url': 'http://screencast-o-matic.com/watch/c2lD3BeOPl',
|
||||
@@ -20,6 +16,7 @@ class ScreencastOMaticIE(InfoExtractor):
|
||||
'title': 'Welcome to 3-4 Philosophy @ DECV!',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'description': 'as the title says! also: some general info re 1) VCE philosophy and 2) distance learning.',
|
||||
'duration': 369.163,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -27,23 +24,14 @@ class ScreencastOMaticIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
setup_js = self._search_regex(
|
||||
r"(?s)jwplayer\('mp4Player'\).setup\((\{.*?\})\);",
|
||||
webpage, 'setup code')
|
||||
data = self._parse_json(setup_js, video_id, transform_source=js_to_json)
|
||||
try:
|
||||
video_data = next(
|
||||
m for m in data['modes'] if m.get('type') == 'html5')
|
||||
except StopIteration:
|
||||
raise ExtractorError('Could not find any video entries!')
|
||||
video_url = compat_urlparse.urljoin(url, video_data['config']['file'])
|
||||
thumbnail = data.get('image')
|
||||
jwplayer_data = self._parse_json(
|
||||
self._search_regex(
|
||||
r"(?s)jwplayer\('mp4Player'\).setup\((\{.*?\})\);", webpage, 'setup code'),
|
||||
video_id, transform_source=js_to_json)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
info_dict = self._parse_jwplayer_data(jwplayer_data, video_id, require_title=False)
|
||||
info_dict.update({
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
})
|
||||
return info_dict
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class TeleBruxellesIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?telebruxelles\.be/(news|sport|dernier-jt)/?(?P<id>[^/#?]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:telebruxelles|bx1)\.be/(news|sport|dernier-jt)/?(?P<id>[^/#?]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.telebruxelles.be/news/auditions-devant-parlement-francken-galant-tres-attendus/',
|
||||
'md5': '59439e568c9ee42fb77588b2096b214f',
|
||||
@@ -39,18 +41,18 @@ class TeleBruxellesIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
article_id = self._html_search_regex(
|
||||
r"<article id=\"post-(\d+)\"", webpage, 'article ID')
|
||||
r"<article id=\"post-(\d+)\"", webpage, 'article ID', default=None)
|
||||
title = self._html_search_regex(
|
||||
r'<h1 class=\"entry-title\">(.*?)</h1>', webpage, 'title')
|
||||
description = self._og_search_description(webpage)
|
||||
description = self._og_search_description(webpage, default=None)
|
||||
|
||||
rtmp_url = self._html_search_regex(
|
||||
r"file: \"(rtmp://\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{1,5}/vod/mp4:\" \+ \"\w+\" \+ \".mp4)\"",
|
||||
r'file\s*:\s*"(rtmp://[^/]+/vod/mp4:"\s*\+\s*"[^"]+"\s*\+\s*".mp4)"',
|
||||
webpage, 'RTMP url')
|
||||
rtmp_url = rtmp_url.replace("\" + \"", "")
|
||||
rtmp_url = re.sub(r'"\s*\+\s*"', '', rtmp_url)
|
||||
|
||||
return {
|
||||
'id': article_id,
|
||||
'id': article_id or display_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
|
||||
@@ -76,7 +76,11 @@ class TNAFlixNetworkBaseIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
cfg_url = self._proto_relative_url(self._html_search_regex(
|
||||
self._CONFIG_REGEX, webpage, 'flashvars.config'), 'http:')
|
||||
self._CONFIG_REGEX, webpage, 'flashvars.config', default=None), 'http:')
|
||||
|
||||
if not cfg_url:
|
||||
inputs = self._hidden_inputs(webpage)
|
||||
cfg_url = 'https://cdn-fck.tnaflix.com/tnaflix/%s.fid?key=%s' % (inputs['vkey'], inputs['nkey'])
|
||||
|
||||
cfg_xml = self._download_xml(
|
||||
cfg_url, display_id, 'Downloading metadata',
|
||||
@@ -132,7 +136,7 @@ class TNAFlixNetworkBaseIE(InfoExtractor):
|
||||
average_rating = float_or_none(extract_field(self._AVERAGE_RATING_REGEX, 'average rating'))
|
||||
|
||||
categories_str = extract_field(self._CATEGORIES_REGEX, 'categories')
|
||||
categories = categories_str.split(', ') if categories_str is not None else []
|
||||
categories = [c.strip() for c in categories_str.split(',')] if categories_str is not None else []
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@@ -186,13 +190,14 @@ class TNAFlixIE(TNAFlixNetworkBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?tnaflix\.com/[^/]+/(?P<display_id>[^/]+)/video(?P<id>\d+)'
|
||||
|
||||
_TITLE_REGEX = r'<title>(.+?) - TNAFlix Porn Videos</title>'
|
||||
_DESCRIPTION_REGEX = r'<h3 itemprop="description">([^<]+)</h3>'
|
||||
_UPLOADER_REGEX = r'(?s)<span[^>]+class="infoTitle"[^>]*>Uploaded By:</span>(.+?)<div'
|
||||
_DESCRIPTION_REGEX = r'<meta[^>]+name="description"[^>]+content="([^"]+)"'
|
||||
_UPLOADER_REGEX = r'<i>\s*Verified Member\s*</i>\s*<h1>(.+?)</h1>'
|
||||
_CATEGORIES_REGEX = r'(?s)<span[^>]*>Categories:</span>(.+?)</div>'
|
||||
|
||||
_TESTS = [{
|
||||
# anonymous uploader, no categories
|
||||
'url': 'http://www.tnaflix.com/porn-stars/Carmella-Decesare-striptease/video553878',
|
||||
'md5': 'ecf3498417d09216374fc5907f9c6ec0',
|
||||
'md5': '7e569419fe6d69543d01e6be22f5f7c4',
|
||||
'info_dict': {
|
||||
'id': '553878',
|
||||
'display_id': 'Carmella-Decesare-striptease',
|
||||
@@ -201,17 +206,16 @@ class TNAFlixIE(TNAFlixNetworkBaseIE):
|
||||
'thumbnail': 're:https?://.*\.jpg$',
|
||||
'duration': 91,
|
||||
'age_limit': 18,
|
||||
'uploader': 'Anonymous',
|
||||
'categories': [],
|
||||
'categories': ['Porn Stars'],
|
||||
}
|
||||
}, {
|
||||
# non-anonymous uploader, categories
|
||||
'url': 'https://www.tnaflix.com/teen-porn/Educational-xxx-video/video6538',
|
||||
'md5': '0f5d4d490dbfd117b8607054248a07c0',
|
||||
'md5': 'fcba2636572895aba116171a899a5658',
|
||||
'info_dict': {
|
||||
'id': '6538',
|
||||
'display_id': 'Educational-xxx-video',
|
||||
'ext': 'mp4',
|
||||
'ext': 'flv',
|
||||
'title': 'Educational xxx video',
|
||||
'description': 'md5:b4fab8f88a8621c8fabd361a173fe5b8',
|
||||
'thumbnail': 're:https?://.*\.jpg$',
|
||||
|
||||
@@ -14,8 +14,11 @@ class VideoDetectiveIE(InfoExtractor):
|
||||
'id': '194487',
|
||||
'ext': 'mp4',
|
||||
'title': 'KICK-ASS 2',
|
||||
'description': 'md5:65ba37ad619165afac7d432eaded6013',
|
||||
'duration': 138,
|
||||
'description': 'md5:c189d5b7280400630a1d3dd17eaa8d8a',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -24,4 +27,4 @@ class VideoDetectiveIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
og_video = self._og_search_video_url(webpage)
|
||||
query = compat_urlparse.urlparse(og_video).query
|
||||
return self.url_result(InternetVideoArchiveIE._build_url(query), ie=InternetVideoArchiveIE.ie_key())
|
||||
return self.url_result(InternetVideoArchiveIE._build_json_url(query), ie=InternetVideoArchiveIE.ie_key())
|
||||
|
||||
@@ -4,7 +4,10 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import float_or_none
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
)
|
||||
|
||||
|
||||
class VRTIE(InfoExtractor):
|
||||
@@ -52,6 +55,11 @@ class VRTIE(InfoExtractor):
|
||||
'duration': 661,
|
||||
}
|
||||
},
|
||||
{
|
||||
# YouTube video
|
||||
'url': 'http://deredactie.be/cm/vrtnieuws/videozone/nieuws/cultuurenmedia/1.2622957',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://cobra.canvas.be/cm/cobra/videozone/rubriek/film-videozone/1.2377055',
|
||||
'only_matching': True,
|
||||
@@ -66,7 +74,17 @@ class VRTIE(InfoExtractor):
|
||||
video_id = self._search_regex(
|
||||
r'data-video-id="([^"]+)_[^"]+"', webpage, 'video id', fatal=False)
|
||||
|
||||
src = self._search_regex(
|
||||
r'data-video-src="([^"]+)"', webpage, 'video src', default=None)
|
||||
|
||||
video_type = self._search_regex(
|
||||
r'data-video-type="([^"]+)"', webpage, 'video type', default=None)
|
||||
|
||||
if video_type == 'YouTubeVideo':
|
||||
return self.url_result(src, 'Youtube')
|
||||
|
||||
formats = []
|
||||
|
||||
mobj = re.search(
|
||||
r'data-video-iphone-server="(?P<server>[^"]+)"\s+data-video-iphone-path="(?P<path>[^"]+)"',
|
||||
webpage)
|
||||
@@ -74,11 +92,15 @@ class VRTIE(InfoExtractor):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
'%s/%s' % (mobj.group('server'), mobj.group('path')),
|
||||
video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
mobj = re.search(r'data-video-src="(?P<src>[^"]+)"', webpage)
|
||||
if mobj:
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
'%s/manifest.f4m' % mobj.group('src'),
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
|
||||
if src:
|
||||
if determine_ext(src) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
src, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
'%s/manifest.f4m' % src, video_id, f4m_id='hds', fatal=False))
|
||||
|
||||
if not formats and 'data-video-geoblocking="true"' in webpage:
|
||||
self.raise_geo_restricted('This video is only available in Belgium')
|
||||
|
||||
@@ -39,9 +39,14 @@ class YandexMusicTrackIE(YandexMusicBaseIE):
|
||||
'info_dict': {
|
||||
'id': '4878838',
|
||||
'ext': 'mp3',
|
||||
'title': 'Carlo Ambrosio - Gypsy Eyes 1',
|
||||
'title': 'Carlo Ambrosio & Fabio Di Bari, Carlo Ambrosio - Gypsy Eyes 1',
|
||||
'filesize': 4628061,
|
||||
'duration': 193.04,
|
||||
'track': 'Gypsy Eyes 1',
|
||||
'album': 'Gypsy Soul',
|
||||
'album_artist': 'Carlo Ambrosio',
|
||||
'artist': 'Carlo Ambrosio & Fabio Di Bari, Carlo Ambrosio',
|
||||
'release_year': '2009',
|
||||
}
|
||||
}
|
||||
|
||||
@@ -64,16 +69,45 @@ class YandexMusicTrackIE(YandexMusicBaseIE):
|
||||
thumbnail = cover_uri.replace('%%', 'orig')
|
||||
if not thumbnail.startswith('http'):
|
||||
thumbnail = 'http://' + thumbnail
|
||||
return {
|
||||
|
||||
track_title = track['title']
|
||||
track_info = {
|
||||
'id': track['id'],
|
||||
'ext': 'mp3',
|
||||
'url': self._get_track_url(track['storageDir'], track['id']),
|
||||
'title': '%s - %s' % (track['artists'][0]['name'], track['title']),
|
||||
'filesize': int_or_none(track.get('fileSize')),
|
||||
'duration': float_or_none(track.get('durationMs'), 1000),
|
||||
'thumbnail': thumbnail,
|
||||
'track': track_title,
|
||||
}
|
||||
|
||||
def extract_artist(artist_list):
|
||||
if artist_list and isinstance(artist_list, list):
|
||||
artists_names = [a['name'] for a in artist_list if a.get('name')]
|
||||
if artists_names:
|
||||
return ', '.join(artists_names)
|
||||
|
||||
albums = track.get('albums')
|
||||
if albums and isinstance(albums, list):
|
||||
album = albums[0]
|
||||
if isinstance(album, dict):
|
||||
year = album.get('year')
|
||||
track_info.update({
|
||||
'album': album.get('title'),
|
||||
'album_artist': extract_artist(album.get('artists')),
|
||||
'release_year': compat_str(year) if year else None,
|
||||
})
|
||||
|
||||
track_artist = extract_artist(track.get('artists'))
|
||||
if track_artist:
|
||||
track_info.update({
|
||||
'artist': track_artist,
|
||||
'title': '%s - %s' % (track_artist, track_title),
|
||||
})
|
||||
else:
|
||||
track_info['title'] = track_title
|
||||
return track_info
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
album_id, track_id = mobj.group('album_id'), mobj.group('id')
|
||||
|
||||
@@ -1884,7 +1884,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
|
||||
if video:
|
||||
return video
|
||||
|
||||
if playlist_id.startswith('RD') or playlist_id.startswith('UL'):
|
||||
if playlist_id.startswith(('RD', 'UL', 'PU')):
|
||||
# Mixes require a custom extraction process
|
||||
return self._extract_mix(playlist_id)
|
||||
|
||||
|
||||
@@ -1792,6 +1792,8 @@ def urlencode_postdata(*args, **kargs):
|
||||
|
||||
|
||||
def update_url_query(url, query):
|
||||
if not query:
|
||||
return url
|
||||
parsed_url = compat_urlparse.urlparse(url)
|
||||
qs = compat_parse_qs(parsed_url.query)
|
||||
qs.update(query)
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2016.04.06'
|
||||
__version__ = '2016.04.13'
|
||||
|
||||
Reference in New Issue
Block a user