mirror of
https://source.netsyms.com/Mirrors/youtube-dl
synced 2026-04-22 16:07:38 +00:00
Compare commits
115 Commits
2013.07.08
...
2013.07.24
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
870a7e6156 | ||
|
|
239e3e0cca | ||
|
|
fc492de31d | ||
|
|
a9c0f9bc63 | ||
|
|
b7cc9f5026 | ||
|
|
252580c561 | ||
|
|
acc47c1a3f | ||
|
|
70fa830e4d | ||
|
|
a7af0ebaf5 | ||
|
|
67ae7b4760 | ||
|
|
de48addae2 | ||
|
|
ddbfd0f0c5 | ||
|
|
d7ae0639b4 | ||
|
|
0382435990 | ||
|
|
b390d85d95 | ||
|
|
be925dc64c | ||
|
|
de7a91bfe3 | ||
|
|
a4358cbabd | ||
|
|
177ed935a9 | ||
|
|
c364f15ff1 | ||
|
|
e1f6e61e6a | ||
|
|
0932300e3a | ||
|
|
3f40217704 | ||
|
|
f631c3311a | ||
|
|
ad433bb372 | ||
|
|
3e0b3a1428 | ||
|
|
444b116597 | ||
|
|
2aea08eda1 | ||
|
|
8e5e059d7d | ||
|
|
2b1b511f6b | ||
|
|
233ad24ecf | ||
|
|
c4949c50f9 | ||
|
|
b6ef402905 | ||
|
|
ccf365475a | ||
|
|
e1fb245690 | ||
|
|
5a76c6517e | ||
|
|
1bb9568776 | ||
|
|
ecd1c2f7e9 | ||
|
|
466de68801 | ||
|
|
88d4111cfa | ||
|
|
51fb64bab1 | ||
|
|
be547e1d3b | ||
|
|
bf85454116 | ||
|
|
5910724b11 | ||
|
|
7e24b09da9 | ||
|
|
f085f960e7 | ||
|
|
f38de77f6e | ||
|
|
58e7d46d1b | ||
|
|
2a5201638d | ||
|
|
fe6fad1242 | ||
|
|
ec00e1d8a0 | ||
|
|
de29c4144e | ||
|
|
f3bab0044e | ||
|
|
ffd1833b87 | ||
|
|
896d5b63e8 | ||
|
|
67de24e449 | ||
|
|
66400c470c | ||
|
|
7665010267 | ||
|
|
5d9b75051a | ||
|
|
ab2f744b90 | ||
|
|
300fcad8a6 | ||
|
|
f7e025958a | ||
|
|
0ab5531363 | ||
|
|
b4444d5ca2 | ||
|
|
b9d3e1635f | ||
|
|
aa6b734e02 | ||
|
|
73b57f0ccb | ||
|
|
3c4e6d8337 | ||
|
|
36034aecc2 | ||
|
|
ffca4b5c32 | ||
|
|
b0e72bcf34 | ||
|
|
7fd930c0c8 | ||
|
|
2e78b2bead | ||
|
|
44dbe89035 | ||
|
|
2d5a8b5512 | ||
|
|
159736c1b8 | ||
|
|
46720279c2 | ||
|
|
d8269e1dfb | ||
|
|
cbdbb76665 | ||
|
|
6543f0dca5 | ||
|
|
232eb88bfe | ||
|
|
a95967f8b7 | ||
|
|
2ef648d3d3 | ||
|
|
33f6830fd5 | ||
|
|
606d7e67fd | ||
|
|
fd87ff26b9 | ||
|
|
85347e1cb6 | ||
|
|
41897817cc | ||
|
|
45ff2d51d0 | ||
|
|
5de3ece225 | ||
|
|
df50a41289 | ||
|
|
59ae56fad5 | ||
|
|
690e872c51 | ||
|
|
81082e046e | ||
|
|
3fa9550837 | ||
|
|
b1082f01a6 | ||
|
|
f35b84c807 | ||
|
|
117adb0f0f | ||
|
|
abb285fb1b | ||
|
|
a431154706 | ||
|
|
cfe50f04ed | ||
|
|
a7055eb956 | ||
|
|
0a1be1e997 | ||
|
|
c93898dae9 | ||
|
|
ebdf2af727 | ||
|
|
c108eb73cc | ||
|
|
3a1375dacf | ||
|
|
41bece30b4 | ||
|
|
16ea58cbda | ||
|
|
99e350d902 | ||
|
|
13e06d298c | ||
|
|
81f0259b9e | ||
|
|
fefcb5d314 | ||
|
|
345b0c9b46 | ||
|
|
20c3893f0e |
@@ -9,6 +9,7 @@ notifications:
|
|||||||
- filippo.valsorda@gmail.com
|
- filippo.valsorda@gmail.com
|
||||||
- phihag@phihag.de
|
- phihag@phihag.de
|
||||||
- jaime.marquinez.ferrandiz+travis@gmail.com
|
- jaime.marquinez.ferrandiz+travis@gmail.com
|
||||||
|
- yasoob.khld@gmail.com
|
||||||
# irc:
|
# irc:
|
||||||
# channels:
|
# channels:
|
||||||
# - "irc.freenode.org#youtube-dl"
|
# - "irc.freenode.org#youtube-dl"
|
||||||
|
|||||||
@@ -16,7 +16,9 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
# OPTIONS
|
# OPTIONS
|
||||||
-h, --help print this help text and exit
|
-h, --help print this help text and exit
|
||||||
--version print program version and exit
|
--version print program version and exit
|
||||||
-U, --update update this program to latest version
|
-U, --update update this program to latest version. Make sure
|
||||||
|
that you have sufficient permissions (run with
|
||||||
|
sudo if needed)
|
||||||
-i, --ignore-errors continue on download errors
|
-i, --ignore-errors continue on download errors
|
||||||
--dump-user-agent display the current browser identification
|
--dump-user-agent display the current browser identification
|
||||||
--user-agent UA specify a custom user agent
|
--user-agent UA specify a custom user agent
|
||||||
|
|||||||
@@ -5,27 +5,36 @@
|
|||||||
import sys
|
import sys
|
||||||
|
|
||||||
tests = [
|
tests = [
|
||||||
|
# 92 - vflQw-fB4 2013/07/17
|
||||||
|
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`~\"",
|
||||||
|
"mrtyuioplkjhgfdsazxcvbnq1234567890QWERTY}IOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]\"|:;"),
|
||||||
|
# 90
|
||||||
|
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`",
|
||||||
|
"mrtyuioplkjhgfdsazxcvbne1234567890QWER[YUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={`]}|"),
|
||||||
# 88
|
# 88
|
||||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<",
|
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<",
|
||||||
"J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"),
|
"J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"),
|
||||||
# 87
|
# 87
|
||||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<",
|
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<",
|
||||||
"!?;:|}][{=+-_)(*&^$#@/MNBVCXZASqFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr"),
|
"tyuioplkjhgfdsazxcv<nm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>"),
|
||||||
# 86 - vfl_ymO4Z 2013/06/27
|
# 86 - vfl_ymO4Z 2013/06/27
|
||||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
|
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
|
||||||
"ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"),
|
"ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"),
|
||||||
# 85
|
# 85 - vflSAFCP9 2013/07/19
|
||||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
|
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
|
||||||
"{>/?;}[.=+-_)(*&^%$#@!MqBVCXZASDFwHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr"),
|
"ertyuiqplkjhgfdsazx$vbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#<%^&*()_-+={[};?/c"),
|
||||||
# 84
|
# 84
|
||||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
|
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
|
||||||
"<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"),
|
"<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"),
|
||||||
# 83
|
# 83 - vflcaqGO8 2013/07/11
|
||||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
|
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
|
||||||
"D.>/?;}[{=+_)(*&^%$#!MNBVCXeAS<FGHJKLPOIUYTREWZ0987654321mnbvcxzasdfghjklpoiuytrQ"),
|
"urty8ioplkjhgfdsazxcvbqm1234567S90QWERTYUIOPLKJHGFDnAZXCVBNM!#$%^&*()_+={[};?/>.<"),
|
||||||
# 82
|
# 82
|
||||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<",
|
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<",
|
||||||
"Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"),
|
"Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"),
|
||||||
|
# 81
|
||||||
|
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.",
|
||||||
|
"urty8ioplkjhgfdsazxcvbqm1234567e90QWERTYUIOPLKHGFDSnZXCVBNM!@#$%^&*(-+={[};?/>."),
|
||||||
]
|
]
|
||||||
|
|
||||||
def find_matching(wrong, right):
|
def find_matching(wrong, right):
|
||||||
|
|||||||
@@ -4,6 +4,7 @@
|
|||||||
|
|
||||||
import sys
|
import sys
|
||||||
import unittest
|
import unittest
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
# Allow direct execution
|
# Allow direct execution
|
||||||
import os
|
import os
|
||||||
@@ -16,6 +17,7 @@ from youtube_dl.utils import unescapeHTML
|
|||||||
from youtube_dl.utils import orderedSet
|
from youtube_dl.utils import orderedSet
|
||||||
from youtube_dl.utils import DateRange
|
from youtube_dl.utils import DateRange
|
||||||
from youtube_dl.utils import unified_strdate
|
from youtube_dl.utils import unified_strdate
|
||||||
|
from youtube_dl.utils import find_xpath_attr
|
||||||
|
|
||||||
if sys.version_info < (3, 0):
|
if sys.version_info < (3, 0):
|
||||||
_compat_str = lambda b: b.decode('unicode-escape')
|
_compat_str = lambda b: b.decode('unicode-escape')
|
||||||
@@ -112,5 +114,18 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(unified_strdate('Dec 14, 2012'), '20121214')
|
self.assertEqual(unified_strdate('Dec 14, 2012'), '20121214')
|
||||||
self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011')
|
self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011')
|
||||||
|
|
||||||
|
def test_find_xpath_attr(self):
|
||||||
|
testxml = u'''<root>
|
||||||
|
<node/>
|
||||||
|
<node x="a"/>
|
||||||
|
<node x="a" y="c" />
|
||||||
|
<node x="b" y="d" />
|
||||||
|
</root>'''
|
||||||
|
doc = xml.etree.ElementTree.fromstring(testxml)
|
||||||
|
|
||||||
|
self.assertEqual(find_xpath_attr(doc, './/fourohfour', 'n', 'v'), None)
|
||||||
|
self.assertEqual(find_xpath_attr(doc, './/node', 'x', 'a'), doc[1])
|
||||||
|
self.assertEqual(find_xpath_attr(doc, './/node', 'y', 'c'), doc[2])
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|||||||
22
test/test_youtube_sig.py
Executable file → Normal file
22
test/test_youtube_sig.py
Executable file → Normal file
@@ -13,9 +13,14 @@ from helper import FakeYDL
|
|||||||
sig = YoutubeIE(FakeYDL())._decrypt_signature
|
sig = YoutubeIE(FakeYDL())._decrypt_signature
|
||||||
|
|
||||||
class TestYoutubeSig(unittest.TestCase):
|
class TestYoutubeSig(unittest.TestCase):
|
||||||
def test_43_43(self):
|
def test_92(self):
|
||||||
wrong = '5AEEAE0EC39677BC65FD9021CCD115F1F2DBD5A59E4.C0B243A3E2DED6769199AF3461781E75122AE135135'
|
wrong = "F9F9B6E6FD47029957AB911A964CC20D95A181A5D37A2DBEFD67D403DB0E8BE4F4910053E4E8A79.0B70B.0B80B8"
|
||||||
right = '931EA22157E1871643FA9519676DED253A342B0C.4E95A5DBD2F1F511DCC1209DF56CB77693CE0EAE'
|
right = "69B6E6FD47029957AB911A9F4CC20D95A181A5D3.A2DBEFD67D403DB0E8BE4F4910053E4E8A7980B7"
|
||||||
|
self.assertEqual(sig(wrong), right)
|
||||||
|
|
||||||
|
def test_90(self):
|
||||||
|
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`"
|
||||||
|
right = "mrtyuioplkjhgfdsazxcvbne1234567890QWER[YUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={`]}|"
|
||||||
self.assertEqual(sig(wrong), right)
|
self.assertEqual(sig(wrong), right)
|
||||||
|
|
||||||
def test_88(self):
|
def test_88(self):
|
||||||
@@ -25,7 +30,7 @@ class TestYoutubeSig(unittest.TestCase):
|
|||||||
|
|
||||||
def test_87(self):
|
def test_87(self):
|
||||||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<"
|
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<"
|
||||||
right = "!?;:|}][{=+-_)(*&^$#@/MNBVCXZASqFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr"
|
right = "tyuioplkjhgfdsazxcv<nm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>"
|
||||||
self.assertEqual(sig(wrong), right)
|
self.assertEqual(sig(wrong), right)
|
||||||
|
|
||||||
def test_86(self):
|
def test_86(self):
|
||||||
@@ -35,7 +40,7 @@ class TestYoutubeSig(unittest.TestCase):
|
|||||||
|
|
||||||
def test_85(self):
|
def test_85(self):
|
||||||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<"
|
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<"
|
||||||
right = "{>/?;}[.=+-_)(*&^%$#@!MqBVCXZASDFwHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr"
|
right = "ertyuiqplkjhgfdsazx$vbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#<%^&*()_-+={[};?/c"
|
||||||
self.assertEqual(sig(wrong), right)
|
self.assertEqual(sig(wrong), right)
|
||||||
|
|
||||||
def test_84(self):
|
def test_84(self):
|
||||||
@@ -45,7 +50,7 @@ class TestYoutubeSig(unittest.TestCase):
|
|||||||
|
|
||||||
def test_83(self):
|
def test_83(self):
|
||||||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<"
|
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<"
|
||||||
right = "D.>/?;}[{=+_)(*&^%$#!MNBVCXeAS<FGHJKLPOIUYTREWZ0987654321mnbvcxzasdfghjklpoiuytrQ"
|
right = "urty8ioplkjhgfdsazxcvbqm1234567S90QWERTYUIOPLKJHGFDnAZXCVBNM!#$%^&*()_+={[};?/>.<"
|
||||||
self.assertEqual(sig(wrong), right)
|
self.assertEqual(sig(wrong), right)
|
||||||
|
|
||||||
def test_82(self):
|
def test_82(self):
|
||||||
@@ -53,5 +58,10 @@ class TestYoutubeSig(unittest.TestCase):
|
|||||||
right = "Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"
|
right = "Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"
|
||||||
self.assertEqual(sig(wrong), right)
|
self.assertEqual(sig(wrong), right)
|
||||||
|
|
||||||
|
def test_81(self):
|
||||||
|
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>."
|
||||||
|
right = "urty8ioplkjhgfdsazxcvbqm1234567e90QWERTYUIOPLKHGFDSnZXCVBNM!@#$%^&*(-+={[};?/>."
|
||||||
|
self.assertEqual(sig(wrong), right)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|||||||
@@ -348,6 +348,7 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
|
result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
|
||||||
if result_type == 'video':
|
if result_type == 'video':
|
||||||
|
ie_result.update(extra_info)
|
||||||
if 'playlist' not in ie_result:
|
if 'playlist' not in ie_result:
|
||||||
# It isn't part of a playlist
|
# It isn't part of a playlist
|
||||||
ie_result['playlist'] = None
|
ie_result['playlist'] = None
|
||||||
@@ -528,10 +529,8 @@ class YoutubeDL(object):
|
|||||||
return
|
return
|
||||||
|
|
||||||
if self.params.get('writethumbnail', False):
|
if self.params.get('writethumbnail', False):
|
||||||
if 'thumbnail' in info_dict:
|
if info_dict.get('thumbnail') is not None:
|
||||||
thumb_format = info_dict['thumbnail'].rpartition(u'/')[2].rpartition(u'.')[2]
|
thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
|
||||||
if not thumb_format:
|
|
||||||
thumb_format = 'jpg'
|
|
||||||
thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
|
thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
|
||||||
self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
|
self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
|
||||||
(info_dict['extractor'], info_dict['id']))
|
(info_dict['extractor'], info_dict['id']))
|
||||||
|
|||||||
@@ -129,7 +129,7 @@ def parseOpts(overrideArguments=None):
|
|||||||
general.add_option('-v', '--version',
|
general.add_option('-v', '--version',
|
||||||
action='version', help='print program version and exit')
|
action='version', help='print program version and exit')
|
||||||
general.add_option('-U', '--update',
|
general.add_option('-U', '--update',
|
||||||
action='store_true', dest='update_self', help='update this program to latest version')
|
action='store_true', dest='update_self', help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)')
|
||||||
general.add_option('-i', '--ignore-errors',
|
general.add_option('-i', '--ignore-errors',
|
||||||
action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
|
action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
|
||||||
general.add_option('--dump-user-agent',
|
general.add_option('--dump-user-agent',
|
||||||
@@ -580,7 +580,7 @@ def _real_main(argv=None):
|
|||||||
})
|
})
|
||||||
|
|
||||||
if opts.verbose:
|
if opts.verbose:
|
||||||
ydl.to_screen(u'[debug] youtube-dl version ' + __version__)
|
sys.stderr.write(u'[debug] youtube-dl version ' + __version__ + u'\n')
|
||||||
try:
|
try:
|
||||||
sp = subprocess.Popen(
|
sp = subprocess.Popen(
|
||||||
['git', 'rev-parse', '--short', 'HEAD'],
|
['git', 'rev-parse', '--short', 'HEAD'],
|
||||||
@@ -589,11 +589,14 @@ def _real_main(argv=None):
|
|||||||
out, err = sp.communicate()
|
out, err = sp.communicate()
|
||||||
out = out.decode().strip()
|
out = out.decode().strip()
|
||||||
if re.match('[0-9a-f]+', out):
|
if re.match('[0-9a-f]+', out):
|
||||||
ydl.to_screen(u'[debug] Git HEAD: ' + out)
|
sys.stderr.write(u'[debug] Git HEAD: ' + out + u'\n')
|
||||||
except:
|
except:
|
||||||
sys.exc_clear()
|
try:
|
||||||
ydl.to_screen(u'[debug] Python version %s - %s' %(platform.python_version(), platform.platform()))
|
sys.exc_clear()
|
||||||
ydl.to_screen(u'[debug] Proxy map: ' + str(proxy_handler.proxies))
|
except:
|
||||||
|
pass
|
||||||
|
sys.stderr.write(u'[debug] Python version %s - %s' %(platform.python_version(), platform.platform()) + u'\n')
|
||||||
|
sys.stderr.write(u'[debug] Proxy map: ' + str(proxy_handler.proxies) + u'\n')
|
||||||
|
|
||||||
ydl.add_default_info_extractors()
|
ydl.add_default_info_extractors()
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,3 @@
|
|||||||
|
|
||||||
from .archiveorg import ArchiveOrgIE
|
from .archiveorg import ArchiveOrgIE
|
||||||
from .ard import ARDIE
|
from .ard import ARDIE
|
||||||
from .arte import ArteTvIE
|
from .arte import ArteTvIE
|
||||||
@@ -7,16 +6,23 @@ from .bandcamp import BandcampIE
|
|||||||
from .bliptv import BlipTVIE, BlipTVUserIE
|
from .bliptv import BlipTVIE, BlipTVUserIE
|
||||||
from .breakcom import BreakIE
|
from .breakcom import BreakIE
|
||||||
from .brightcove import BrightcoveIE
|
from .brightcove import BrightcoveIE
|
||||||
|
from .canalplus import CanalplusIE
|
||||||
from .collegehumor import CollegeHumorIE
|
from .collegehumor import CollegeHumorIE
|
||||||
from .comedycentral import ComedyCentralIE
|
from .comedycentral import ComedyCentralIE
|
||||||
|
from .condenast import CondeNastIE
|
||||||
|
from .criterion import CriterionIE
|
||||||
from .cspan import CSpanIE
|
from .cspan import CSpanIE
|
||||||
from .dailymotion import DailymotionIE
|
from .dailymotion import DailymotionIE
|
||||||
from .depositfiles import DepositFilesIE
|
from .depositfiles import DepositFilesIE
|
||||||
|
from .dotsub import DotsubIE
|
||||||
from .dreisat import DreiSatIE
|
from .dreisat import DreiSatIE
|
||||||
|
from .ehow import EHowIE
|
||||||
from .eighttracks import EightTracksIE
|
from .eighttracks import EightTracksIE
|
||||||
from .escapist import EscapistIE
|
from .escapist import EscapistIE
|
||||||
|
from .exfm import ExfmIE
|
||||||
from .facebook import FacebookIE
|
from .facebook import FacebookIE
|
||||||
from .flickr import FlickrIE
|
from .flickr import FlickrIE
|
||||||
|
from .freesound import FreesoundIE
|
||||||
from .funnyordie import FunnyOrDieIE
|
from .funnyordie import FunnyOrDieIE
|
||||||
from .gamespot import GameSpotIE
|
from .gamespot import GameSpotIE
|
||||||
from .gametrailers import GametrailersIE
|
from .gametrailers import GametrailersIE
|
||||||
@@ -26,6 +32,7 @@ from .googlesearch import GoogleSearchIE
|
|||||||
from .hotnewhiphop import HotNewHipHopIE
|
from .hotnewhiphop import HotNewHipHopIE
|
||||||
from .howcast import HowcastIE
|
from .howcast import HowcastIE
|
||||||
from .hypem import HypemIE
|
from .hypem import HypemIE
|
||||||
|
from .ign import IGNIE, OneUPIE
|
||||||
from .ina import InaIE
|
from .ina import InaIE
|
||||||
from .infoq import InfoQIE
|
from .infoq import InfoQIE
|
||||||
from .instagram import InstagramIE
|
from .instagram import InstagramIE
|
||||||
@@ -33,6 +40,7 @@ from .jukebox import JukeboxIE
|
|||||||
from .justintv import JustinTVIE
|
from .justintv import JustinTVIE
|
||||||
from .keek import KeekIE
|
from .keek import KeekIE
|
||||||
from .liveleak import LiveLeakIE
|
from .liveleak import LiveLeakIE
|
||||||
|
from .livestream import LivestreamIE
|
||||||
from .metacafe import MetacafeIE
|
from .metacafe import MetacafeIE
|
||||||
from .mixcloud import MixcloudIE
|
from .mixcloud import MixcloudIE
|
||||||
from .mtv import MTVIE
|
from .mtv import MTVIE
|
||||||
@@ -44,6 +52,7 @@ from .pornotube import PornotubeIE
|
|||||||
from .rbmaradio import RBMARadioIE
|
from .rbmaradio import RBMARadioIE
|
||||||
from .redtube import RedTubeIE
|
from .redtube import RedTubeIE
|
||||||
from .ringtv import RingTVIE
|
from .ringtv import RingTVIE
|
||||||
|
from .sina import SinaIE
|
||||||
from .soundcloud import SoundcloudIE, SoundcloudSetIE
|
from .soundcloud import SoundcloudIE, SoundcloudSetIE
|
||||||
from .spiegel import SpiegelIE
|
from .spiegel import SpiegelIE
|
||||||
from .stanfordoc import StanfordOpenClassroomIE
|
from .stanfordoc import StanfordOpenClassroomIE
|
||||||
@@ -52,16 +61,20 @@ from .steam import SteamIE
|
|||||||
from .teamcoco import TeamcocoIE
|
from .teamcoco import TeamcocoIE
|
||||||
from .ted import TEDIE
|
from .ted import TEDIE
|
||||||
from .tf1 import TF1IE
|
from .tf1 import TF1IE
|
||||||
|
from .thisav import ThisAVIE
|
||||||
from .traileraddict import TrailerAddictIE
|
from .traileraddict import TrailerAddictIE
|
||||||
from .tudou import TudouIE
|
from .tudou import TudouIE
|
||||||
from .tumblr import TumblrIE
|
from .tumblr import TumblrIE
|
||||||
from .tutv import TutvIE
|
from .tutv import TutvIE
|
||||||
from .ustream import UstreamIE
|
from .ustream import UstreamIE
|
||||||
from .vbox7 import Vbox7IE
|
from .vbox7 import Vbox7IE
|
||||||
|
from .veoh import VeohIE
|
||||||
from .vevo import VevoIE
|
from .vevo import VevoIE
|
||||||
from .vimeo import VimeoIE
|
from .vimeo import VimeoIE
|
||||||
from .vine import VineIE
|
from .vine import VineIE
|
||||||
|
from .c56 import C56IE
|
||||||
from .wat import WatIE
|
from .wat import WatIE
|
||||||
|
from .weibo import WeiboIE
|
||||||
from .wimp import WimpIE
|
from .wimp import WimpIE
|
||||||
from .worldstarhiphop import WorldStarHipHopIE
|
from .worldstarhiphop import WorldStarHipHopIE
|
||||||
from .xhamster import XHamsterIE
|
from .xhamster import XHamsterIE
|
||||||
@@ -79,6 +92,7 @@ from .youtube import (
|
|||||||
YoutubeChannelIE,
|
YoutubeChannelIE,
|
||||||
YoutubeShowIE,
|
YoutubeShowIE,
|
||||||
YoutubeSubscriptionsIE,
|
YoutubeSubscriptionsIE,
|
||||||
|
YoutubeRecommendedIE,
|
||||||
)
|
)
|
||||||
from .zdf import ZDFIE
|
from .zdf import ZDFIE
|
||||||
|
|
||||||
|
|||||||
@@ -48,6 +48,7 @@ class ArchiveOrgIE(InfoExtractor):
|
|||||||
formats.sort(key=lambda fdata: fdata['file_size'])
|
formats.sort(key=lambda fdata: fdata['file_size'])
|
||||||
|
|
||||||
info = {
|
info = {
|
||||||
|
'_type': 'video',
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
@@ -63,4 +64,4 @@ class ArchiveOrgIE(InfoExtractor):
|
|||||||
info['url'] = formats[-1]['url']
|
info['url'] = formats[-1]['url']
|
||||||
info['ext'] = determine_ext(formats[-1]['url'])
|
info['ext'] = determine_ext(formats[-1]['url'])
|
||||||
|
|
||||||
return self.video_result(info)
|
return info
|
||||||
@@ -32,7 +32,7 @@ class ARDIE(InfoExtractor):
|
|||||||
# determine title and media streams from webpage
|
# determine title and media streams from webpage
|
||||||
html = self._download_webpage(url, video_id)
|
html = self._download_webpage(url, video_id)
|
||||||
title = re.search(self._TITLE, html).group('title')
|
title = re.search(self._TITLE, html).group('title')
|
||||||
streams = [m.groupdict() for m in re.finditer(self._MEDIA_STREAM, html)]
|
streams = [mo.groupdict() for mo in re.finditer(self._MEDIA_STREAM, html)]
|
||||||
if not streams:
|
if not streams:
|
||||||
assert '"fsk"' in html
|
assert '"fsk"' in html
|
||||||
raise ExtractorError(u'This video is only available after 8:00 pm')
|
raise ExtractorError(u'This video is only available after 8:00 pm')
|
||||||
|
|||||||
@@ -4,10 +4,8 @@ import xml.etree.ElementTree
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
# This is used by the not implemented extractLiveStream method
|
|
||||||
compat_urllib_parse,
|
|
||||||
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
find_xpath_attr,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -28,6 +26,7 @@ class ArteTvIE(InfoExtractor):
|
|||||||
return any(re.match(regex, url) for regex in (cls._EMISSION_URL, cls._VIDEOS_URL))
|
return any(re.match(regex, url) for regex in (cls._EMISSION_URL, cls._VIDEOS_URL))
|
||||||
|
|
||||||
# TODO implement Live Stream
|
# TODO implement Live Stream
|
||||||
|
# from ..utils import compat_urllib_parse
|
||||||
# def extractLiveStream(self, url):
|
# def extractLiveStream(self, url):
|
||||||
# video_lang = url.split('/')[-4]
|
# video_lang = url.split('/')[-4]
|
||||||
# info = self.grep_webpage(
|
# info = self.grep_webpage(
|
||||||
@@ -57,7 +56,6 @@ class ArteTvIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._EMISSION_URL, url)
|
mobj = re.match(self._EMISSION_URL, url)
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
name = mobj.group('name')
|
|
||||||
lang = mobj.group('lang')
|
lang = mobj.group('lang')
|
||||||
# This is not a real id, it can be for example AJT for the news
|
# This is not a real id, it can be for example AJT for the news
|
||||||
# http://www.arte.tv/guide/fr/emissions/AJT/arte-journal
|
# http://www.arte.tv/guide/fr/emissions/AJT/arte-journal
|
||||||
@@ -122,7 +120,7 @@ class ArteTvIE(InfoExtractor):
|
|||||||
ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml')
|
ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml')
|
||||||
ref_xml = self._download_webpage(ref_xml_url, video_id, note=u'Downloading metadata')
|
ref_xml = self._download_webpage(ref_xml_url, video_id, note=u'Downloading metadata')
|
||||||
ref_xml_doc = xml.etree.ElementTree.fromstring(ref_xml)
|
ref_xml_doc = xml.etree.ElementTree.fromstring(ref_xml)
|
||||||
config_node = ref_xml_doc.find('.//video[@lang="%s"]' % lang)
|
config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang)
|
||||||
config_xml_url = config_node.attrib['ref']
|
config_xml_url = config_node.attrib['ref']
|
||||||
config_xml = self._download_webpage(config_xml_url, video_id, note=u'Downloading configuration')
|
config_xml = self._download_webpage(config_xml_url, video_id, note=u'Downloading configuration')
|
||||||
|
|
||||||
|
|||||||
@@ -189,5 +189,5 @@ class BlipTVUserIE(InfoExtractor):
|
|||||||
pagenum += 1
|
pagenum += 1
|
||||||
|
|
||||||
urls = [u'http://blip.tv/%s' % video_id for video_id in video_ids]
|
urls = [u'http://blip.tv/%s' % video_id for video_id in video_ids]
|
||||||
url_entries = [self.url_result(url, 'BlipTV') for url in urls]
|
url_entries = [self.url_result(vurl, 'BlipTV') for vurl in urls]
|
||||||
return [self.playlist_result(url_entries, playlist_title = username)]
|
return [self.playlist_result(url_entries, playlist_title = username)]
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
import re
|
import re
|
||||||
|
import json
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import determine_ext
|
||||||
|
|
||||||
|
|
||||||
class BreakIE(InfoExtractor):
|
class BreakIE(InfoExtractor):
|
||||||
@@ -17,17 +19,20 @@ class BreakIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group(1).split("-")[-1]
|
video_id = mobj.group(1).split("-")[-1]
|
||||||
webpage = self._download_webpage(url, video_id)
|
embed_url = 'http://www.break.com/embed/%s' % video_id
|
||||||
video_url = re.search(r"videoPath: '(.+?)',",webpage).group(1)
|
webpage = self._download_webpage(embed_url, video_id)
|
||||||
key = re.search(r"icon: '(.+?)',",webpage).group(1)
|
info_json = self._search_regex(r'var embedVars = ({.*?});', webpage,
|
||||||
final_url = str(video_url)+"?"+str(key)
|
u'info json', flags=re.DOTALL)
|
||||||
thumbnail_url = re.search(r"thumbnailURL: '(.+?)'",webpage).group(1)
|
info = json.loads(info_json)
|
||||||
title = re.search(r"sVidTitle: '(.+)',",webpage).group(1)
|
video_url = info['videoUri']
|
||||||
ext = video_url.split('.')[-1]
|
m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', video_url)
|
||||||
|
if m_youtube is not None:
|
||||||
|
return self.url_result(m_youtube.group(1), 'Youtube')
|
||||||
|
final_url = video_url + '?' + info['AuthToken']
|
||||||
return [{
|
return [{
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': final_url,
|
'url': final_url,
|
||||||
'ext': ext,
|
'ext': determine_ext(final_url),
|
||||||
'title': title,
|
'title': info['contentName'],
|
||||||
'thumbnail': thumbnail_url,
|
'thumbnail': info['thumbUri'],
|
||||||
}]
|
}]
|
||||||
|
|||||||
@@ -1,28 +1,82 @@
|
|||||||
import re
|
import re
|
||||||
import json
|
import json
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_urllib_parse,
|
||||||
|
find_xpath_attr,
|
||||||
|
compat_urlparse,
|
||||||
|
)
|
||||||
|
|
||||||
class BrightcoveIE(InfoExtractor):
|
class BrightcoveIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://.*brightcove\.com/.*\?(?P<query>.*videoPlayer=(?P<id>\d*).*)'
|
_VALID_URL = r'https?://.*brightcove\.com/(services|viewer).*\?(?P<query>.*)'
|
||||||
|
_FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
|
||||||
|
_PLAYLIST_URL_TEMPLATE = 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=%s'
|
||||||
|
|
||||||
|
# There is a test for Brigtcove in GenericIE, that way we test both the download
|
||||||
|
# and the detection of videos, and we don't have to find an URL that is always valid
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _build_brighcove_url(cls, object_str):
|
||||||
|
"""
|
||||||
|
Build a Brightcove url from a xml string containing
|
||||||
|
<object class="BrightcoveExperience">{params}</object>
|
||||||
|
"""
|
||||||
|
object_doc = xml.etree.ElementTree.fromstring(object_str)
|
||||||
|
assert u'BrightcoveExperience' in object_doc.attrib['class']
|
||||||
|
params = {'flashID': object_doc.attrib['id'],
|
||||||
|
'playerID': find_xpath_attr(object_doc, './param', 'name', 'playerID').attrib['value'],
|
||||||
|
}
|
||||||
|
playerKey = find_xpath_attr(object_doc, './param', 'name', 'playerKey')
|
||||||
|
# Not all pages define this value
|
||||||
|
if playerKey is not None:
|
||||||
|
params['playerKey'] = playerKey.attrib['value']
|
||||||
|
videoPlayer = find_xpath_attr(object_doc, './param', 'name', '@videoPlayer')
|
||||||
|
if videoPlayer is not None:
|
||||||
|
params['@videoPlayer'] = videoPlayer.attrib['value']
|
||||||
|
data = compat_urllib_parse.urlencode(params)
|
||||||
|
return cls._FEDERATED_URL_TEMPLATE % data
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
query = mobj.group('query')
|
query_str = mobj.group('query')
|
||||||
video_id = mobj.group('id')
|
query = compat_urlparse.parse_qs(query_str)
|
||||||
|
|
||||||
request_url = 'http://c.brightcove.com/services/viewer/htmlFederated?%s' % query
|
videoPlayer = query.get('@videoPlayer')
|
||||||
|
if videoPlayer:
|
||||||
|
return self._get_video_info(videoPlayer[0], query_str)
|
||||||
|
else:
|
||||||
|
player_key = query['playerKey']
|
||||||
|
return self._get_playlist_info(player_key[0])
|
||||||
|
|
||||||
|
def _get_video_info(self, video_id, query):
|
||||||
|
request_url = self._FEDERATED_URL_TEMPLATE % query
|
||||||
webpage = self._download_webpage(request_url, video_id)
|
webpage = self._download_webpage(request_url, video_id)
|
||||||
|
|
||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
info = self._search_regex(r'var experienceJSON = ({.*?});', webpage, 'json')
|
info = self._search_regex(r'var experienceJSON = ({.*?});', webpage, 'json')
|
||||||
info = json.loads(info)['data']
|
info = json.loads(info)['data']
|
||||||
video_info = info['programmedContent']['videoPlayer']['mediaDTO']
|
video_info = info['programmedContent']['videoPlayer']['mediaDTO']
|
||||||
|
|
||||||
|
return self._extract_video_info(video_info)
|
||||||
|
|
||||||
|
def _get_playlist_info(self, player_key):
|
||||||
|
playlist_info = self._download_webpage(self._PLAYLIST_URL_TEMPLATE % player_key,
|
||||||
|
player_key, u'Downloading playlist information')
|
||||||
|
|
||||||
|
playlist_info = json.loads(playlist_info)['videoList']
|
||||||
|
videos = [self._extract_video_info(video_info) for video_info in playlist_info['mediaCollectionDTO']['videoDTOs']]
|
||||||
|
|
||||||
|
return self.playlist_result(videos, playlist_id=playlist_info['id'],
|
||||||
|
playlist_title=playlist_info['mediaCollectionDTO']['displayName'])
|
||||||
|
|
||||||
|
def _extract_video_info(self, video_info):
|
||||||
renditions = video_info['renditions']
|
renditions = video_info['renditions']
|
||||||
renditions = sorted(renditions, key=lambda r: r['size'])
|
renditions = sorted(renditions, key=lambda r: r['size'])
|
||||||
best_format = renditions[-1]
|
best_format = renditions[-1]
|
||||||
|
|
||||||
return {'id': video_id,
|
return {'id': video_info['id'],
|
||||||
'title': video_info['displayName'],
|
'title': video_info['displayName'],
|
||||||
'url': best_format['defaultURL'],
|
'url': best_format['defaultURL'],
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
|||||||
36
youtube_dl/extractor/c56.py
Normal file
36
youtube_dl/extractor/c56.py
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
import re
|
||||||
|
import json
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import determine_ext
|
||||||
|
|
||||||
|
class C56IE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://((www|player)\.)?56\.com/(.+?/)?(v_|(play_album.+-))(?P<textid>.+?)\.(html|swf)'
|
||||||
|
IE_NAME = u'56.com'
|
||||||
|
|
||||||
|
_TEST ={
|
||||||
|
u'url': u'http://www.56.com/u39/v_OTM0NDA3MTY.html',
|
||||||
|
u'file': u'93440716.mp4',
|
||||||
|
u'md5': u'9dc07b5c8e978112a6441f9e75d2b59e',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'网事知多少 第32期:车怒',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
|
||||||
|
text_id = mobj.group('textid')
|
||||||
|
info_page = self._download_webpage('http://vxml.56.com/json/%s/' % text_id,
|
||||||
|
text_id, u'Downloading video info')
|
||||||
|
info = json.loads(info_page)['info']
|
||||||
|
best_format = sorted(info['rfiles'], key=lambda f: int(f['filesize']))[-1]
|
||||||
|
video_url = best_format['url']
|
||||||
|
|
||||||
|
return {'id': info['vid'],
|
||||||
|
'title': info['Subject'],
|
||||||
|
'url': video_url,
|
||||||
|
'ext': determine_ext(video_url),
|
||||||
|
'thumbnail': info.get('bimg') or info.get('img'),
|
||||||
|
}
|
||||||
46
youtube_dl/extractor/canalplus.py
Normal file
46
youtube_dl/extractor/canalplus.py
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
import re
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import unified_strdate
|
||||||
|
|
||||||
|
class CanalplusIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://www\.canalplus\.fr/.*?\?vid=(?P<id>\d+)'
|
||||||
|
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s'
|
||||||
|
IE_NAME = u'canalplus.fr'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.canalplus.fr/c-divertissement/pid3351-c-le-petit-journal.html?vid=889861',
|
||||||
|
u'file': u'889861.flv',
|
||||||
|
u'md5': u'590a888158b5f0d6832f84001fbf3e99',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'Le Petit Journal 20/06/13 - La guerre des drone',
|
||||||
|
u'upload_date': u'20130620',
|
||||||
|
},
|
||||||
|
u'skip': u'Requires rtmpdump'
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
info_url = self._VIDEO_INFO_TEMPLATE % video_id
|
||||||
|
info_page = self._download_webpage(info_url,video_id,
|
||||||
|
u'Downloading video info')
|
||||||
|
|
||||||
|
self.report_extraction(video_id)
|
||||||
|
doc = xml.etree.ElementTree.fromstring(info_page.encode('utf-8'))
|
||||||
|
video_info = [video for video in doc if video.find('ID').text == video_id][0]
|
||||||
|
infos = video_info.find('INFOS')
|
||||||
|
media = video_info.find('MEDIA')
|
||||||
|
formats = [media.find('VIDEOS/%s' % format)
|
||||||
|
for format in ['BAS_DEBIT', 'HAUT_DEBIT', 'HD']]
|
||||||
|
video_url = [format.text for format in formats if format is not None][-1]
|
||||||
|
|
||||||
|
return {'id': video_id,
|
||||||
|
'title': u'%s - %s' % (infos.find('TITRAGE/TITRE').text,
|
||||||
|
infos.find('TITRAGE/SOUS_TITRE').text),
|
||||||
|
'url': video_url,
|
||||||
|
'ext': 'flv',
|
||||||
|
'upload_date': unified_strdate(infos.find('PUBLICATION/DATE').text),
|
||||||
|
'thumbnail': media.find('IMAGES/GRAND').text,
|
||||||
|
}
|
||||||
@@ -1,26 +1,26 @@
|
|||||||
import re
|
import re
|
||||||
import socket
|
|
||||||
import xml.etree.ElementTree
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_http_client,
|
|
||||||
compat_str,
|
|
||||||
compat_urllib_error,
|
|
||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
compat_urllib_request,
|
|
||||||
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class CollegeHumorIE(InfoExtractor):
|
class CollegeHumorIE(InfoExtractor):
|
||||||
_WORKING = False
|
_VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed)/(?P<videoid>[0-9]+)/(?P<shorttitle>.*)$'
|
||||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/video/(?P<videoid>[0-9]+)/(?P<shorttitle>.*)$'
|
|
||||||
|
|
||||||
def report_manifest(self, video_id):
|
_TEST = {
|
||||||
"""Report information extraction."""
|
u'url': u'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',
|
||||||
self.to_screen(u'%s: Downloading XML manifest' % video_id)
|
u'file': u'6902724.mp4',
|
||||||
|
u'md5': u'1264c12ad95dca142a9f0bf7968105a0',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'Comic-Con Cosplay Catastrophe',
|
||||||
|
u'description': u'Fans get creative this year at San Diego. Too creative. And yes, that\'s really Joss Whedon.',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
@@ -36,14 +36,16 @@ class CollegeHumorIE(InfoExtractor):
|
|||||||
|
|
||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
xmlUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id
|
xmlUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id
|
||||||
try:
|
metaXml = self._download_webpage(xmlUrl, video_id,
|
||||||
metaXml = compat_urllib_request.urlopen(xmlUrl).read()
|
u'Downloading info XML',
|
||||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
u'Unable to download video info XML')
|
||||||
raise ExtractorError(u'Unable to download video info XML: %s' % compat_str(err))
|
|
||||||
|
|
||||||
mdoc = xml.etree.ElementTree.fromstring(metaXml)
|
mdoc = xml.etree.ElementTree.fromstring(metaXml)
|
||||||
try:
|
try:
|
||||||
videoNode = mdoc.findall('./video')[0]
|
videoNode = mdoc.findall('./video')[0]
|
||||||
|
youtubeIdNode = videoNode.find('./youtubeID')
|
||||||
|
if youtubeIdNode is not None:
|
||||||
|
return self.url_result(youtubeIdNode.text, 'Youtube')
|
||||||
info['description'] = videoNode.findall('./description')[0].text
|
info['description'] = videoNode.findall('./description')[0].text
|
||||||
info['title'] = videoNode.findall('./caption')[0].text
|
info['title'] = videoNode.findall('./caption')[0].text
|
||||||
info['thumbnail'] = videoNode.findall('./thumbnail')[0].text
|
info['thumbnail'] = videoNode.findall('./thumbnail')[0].text
|
||||||
@@ -52,11 +54,9 @@ class CollegeHumorIE(InfoExtractor):
|
|||||||
raise ExtractorError(u'Invalid metadata XML file')
|
raise ExtractorError(u'Invalid metadata XML file')
|
||||||
|
|
||||||
manifest_url += '?hdcore=2.10.3'
|
manifest_url += '?hdcore=2.10.3'
|
||||||
self.report_manifest(video_id)
|
manifestXml = self._download_webpage(manifest_url, video_id,
|
||||||
try:
|
u'Downloading XML manifest',
|
||||||
manifestXml = compat_urllib_request.urlopen(manifest_url).read()
|
u'Unable to download video info XML')
|
||||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
|
||||||
raise ExtractorError(u'Unable to download video info XML: %s' % compat_str(err))
|
|
||||||
|
|
||||||
adoc = xml.etree.ElementTree.fromstring(manifestXml)
|
adoc = xml.etree.ElementTree.fromstring(manifestXml)
|
||||||
try:
|
try:
|
||||||
@@ -66,9 +66,8 @@ class CollegeHumorIE(InfoExtractor):
|
|||||||
except IndexError as err:
|
except IndexError as err:
|
||||||
raise ExtractorError(u'Invalid manifest file')
|
raise ExtractorError(u'Invalid manifest file')
|
||||||
|
|
||||||
url_pr = compat_urllib_parse_urlparse(manifest_url)
|
url_pr = compat_urllib_parse_urlparse(info['thumbnail'])
|
||||||
url = url_pr.scheme + '://' + url_pr.netloc + '/z' + video_id[:-2] + '/' + node_id + 'Seg1-Frag1'
|
|
||||||
|
|
||||||
info['url'] = url
|
info['url'] = url_pr.scheme + '://' + url_pr.netloc + video_id[:-2].replace('.csmil','').replace(',','')
|
||||||
info['ext'] = 'f4f'
|
info['ext'] = 'mp4'
|
||||||
return [info]
|
return [info]
|
||||||
|
|||||||
@@ -24,7 +24,9 @@ class ComedyCentralIE(InfoExtractor):
|
|||||||
(full-episodes/(?P<episode>.*)|
|
(full-episodes/(?P<episode>.*)|
|
||||||
(?P<clip>
|
(?P<clip>
|
||||||
(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
|
(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
|
||||||
|(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)))))
|
|(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)))|
|
||||||
|
(?P<interview>
|
||||||
|
extended-interviews/(?P<interID>[0-9]+)/playlist_tds_extended_(?P<interview_title>.*?)/.*?)))
|
||||||
$"""
|
$"""
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.thedailyshow.com/watch/thu-december-13-2012/kristen-stewart',
|
u'url': u'http://www.thedailyshow.com/watch/thu-december-13-2012/kristen-stewart',
|
||||||
@@ -87,6 +89,9 @@ class ComedyCentralIE(InfoExtractor):
|
|||||||
else:
|
else:
|
||||||
epTitle = mobj.group('cntitle')
|
epTitle = mobj.group('cntitle')
|
||||||
dlNewest = False
|
dlNewest = False
|
||||||
|
elif mobj.group('interview'):
|
||||||
|
epTitle = mobj.group('interview_title')
|
||||||
|
dlNewest = False
|
||||||
else:
|
else:
|
||||||
dlNewest = not mobj.group('episode')
|
dlNewest = not mobj.group('episode')
|
||||||
if dlNewest:
|
if dlNewest:
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ from ..utils import (
|
|||||||
clean_html,
|
clean_html,
|
||||||
compiled_regex_type,
|
compiled_regex_type,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
unescapeHTML,
|
||||||
)
|
)
|
||||||
|
|
||||||
class InfoExtractor(object):
|
class InfoExtractor(object):
|
||||||
@@ -125,6 +126,11 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None):
|
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None):
|
||||||
""" Returns a tuple (page content as string, URL handle) """
|
""" Returns a tuple (page content as string, URL handle) """
|
||||||
|
|
||||||
|
# Strip hashes from the URL (#1038)
|
||||||
|
if isinstance(url_or_request, (compat_str, str)):
|
||||||
|
url_or_request = url_or_request.partition('#')[0]
|
||||||
|
|
||||||
urlh = self._request_webpage(url_or_request, video_id, note, errnote)
|
urlh = self._request_webpage(url_or_request, video_id, note, errnote)
|
||||||
content_type = urlh.headers.get('Content-Type', '')
|
content_type = urlh.headers.get('Content-Type', '')
|
||||||
m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
|
m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
|
||||||
@@ -169,11 +175,6 @@ class InfoExtractor(object):
|
|||||||
self.to_screen(u'Logging in')
|
self.to_screen(u'Logging in')
|
||||||
|
|
||||||
#Methods for following #608
|
#Methods for following #608
|
||||||
#They set the correct value of the '_type' key
|
|
||||||
def video_result(self, video_info):
|
|
||||||
"""Returns a video"""
|
|
||||||
video_info['_type'] = 'video'
|
|
||||||
return video_info
|
|
||||||
def url_result(self, url, ie=None):
|
def url_result(self, url, ie=None):
|
||||||
"""Returns a url that points to a page that should be processed"""
|
"""Returns a url that points to a page that should be processed"""
|
||||||
#TODO: ie should be the class used for getting the info
|
#TODO: ie should be the class used for getting the info
|
||||||
@@ -262,6 +263,31 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
return (username, password)
|
return (username, password)
|
||||||
|
|
||||||
|
# Helper functions for extracting OpenGraph info
|
||||||
|
@staticmethod
|
||||||
|
def _og_regex(prop):
|
||||||
|
return r'<meta.+?property=[\'"]og:%s[\'"].+?content=(?:"(.+?)"|\'(.+?)\')' % re.escape(prop)
|
||||||
|
|
||||||
|
def _og_search_property(self, prop, html, name=None, **kargs):
|
||||||
|
if name is None:
|
||||||
|
name = 'OpenGraph %s' % prop
|
||||||
|
escaped = self._search_regex(self._og_regex(prop), html, name, flags=re.DOTALL, **kargs)
|
||||||
|
return unescapeHTML(escaped)
|
||||||
|
|
||||||
|
def _og_search_thumbnail(self, html, **kargs):
|
||||||
|
return self._og_search_property('image', html, u'thumbnail url', fatal=False, **kargs)
|
||||||
|
|
||||||
|
def _og_search_description(self, html, **kargs):
|
||||||
|
return self._og_search_property('description', html, fatal=False, **kargs)
|
||||||
|
|
||||||
|
def _og_search_title(self, html, **kargs):
|
||||||
|
return self._og_search_property('title', html, **kargs)
|
||||||
|
|
||||||
|
def _og_search_video_url(self, html, name='video url', **kargs):
|
||||||
|
return self._html_search_regex([self._og_regex('video:secure_url'),
|
||||||
|
self._og_regex('video')],
|
||||||
|
html, name, **kargs)
|
||||||
|
|
||||||
class SearchInfoExtractor(InfoExtractor):
|
class SearchInfoExtractor(InfoExtractor):
|
||||||
"""
|
"""
|
||||||
Base class for paged search queries extractors.
|
Base class for paged search queries extractors.
|
||||||
|
|||||||
106
youtube_dl/extractor/condenast.py
Normal file
106
youtube_dl/extractor/condenast.py
Normal file
@@ -0,0 +1,106 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
import re
|
||||||
|
import json
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_urllib_parse,
|
||||||
|
orderedSet,
|
||||||
|
compat_urllib_parse_urlparse,
|
||||||
|
compat_urlparse,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class CondeNastIE(InfoExtractor):
|
||||||
|
"""
|
||||||
|
Condé Nast is a media group, some of its sites use a custom HTML5 player
|
||||||
|
that works the same in all of them.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# The keys are the supported sites and the values are the name to be shown
|
||||||
|
# to the user and in the extractor description.
|
||||||
|
_SITES = {'wired': u'WIRED',
|
||||||
|
'gq': u'GQ',
|
||||||
|
'vogue': u'Vogue',
|
||||||
|
'glamour': u'Glamour',
|
||||||
|
'wmagazine': u'W Magazine',
|
||||||
|
'vanityfair': u'Vanity Fair',
|
||||||
|
}
|
||||||
|
|
||||||
|
_VALID_URL = r'http://(video|www).(?P<site>%s).com/(?P<type>watch|series|video)/(?P<id>.+)' % '|'.join(_SITES.keys())
|
||||||
|
IE_DESC = u'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values()))
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://video.wired.com/watch/3d-printed-speakers-lit-with-led',
|
||||||
|
u'file': u'5171b343c2b4c00dd0c1ccb3.mp4',
|
||||||
|
u'md5': u'1921f713ed48aabd715691f774c451f7',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'3D Printed Speakers Lit With LED',
|
||||||
|
u'description': u'Check out these beautiful 3D printed LED speakers. You can\'t actually buy them, but LumiGeek is working on a board that will let you make you\'re own.',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _extract_series(self, url, webpage):
|
||||||
|
title = self._html_search_regex(r'<div class="cne-series-info">.*?<h1>(.+?)</h1>',
|
||||||
|
webpage, u'series title', flags=re.DOTALL)
|
||||||
|
url_object = compat_urllib_parse_urlparse(url)
|
||||||
|
base_url = '%s://%s' % (url_object.scheme, url_object.netloc)
|
||||||
|
m_paths = re.finditer(r'<p class="cne-thumb-title">.*?<a href="(/watch/.+?)["\?]',
|
||||||
|
webpage, flags=re.DOTALL)
|
||||||
|
paths = orderedSet(m.group(1) for m in m_paths)
|
||||||
|
build_url = lambda path: compat_urlparse.urljoin(base_url, path)
|
||||||
|
entries = [self.url_result(build_url(path), 'CondeNast') for path in paths]
|
||||||
|
return self.playlist_result(entries, playlist_title=title)
|
||||||
|
|
||||||
|
def _extract_video(self, webpage):
|
||||||
|
description = self._html_search_regex([r'<div class="cne-video-description">(.+?)</div>',
|
||||||
|
r'<div class="video-post-content">(.+?)</div>',
|
||||||
|
],
|
||||||
|
webpage, u'description',
|
||||||
|
fatal=False, flags=re.DOTALL)
|
||||||
|
params = self._search_regex(r'var params = {(.+?)}[;,]', webpage,
|
||||||
|
u'player params', flags=re.DOTALL)
|
||||||
|
video_id = self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, u'video id')
|
||||||
|
player_id = self._search_regex(r'playerId: [\'"](.+?)[\'"]', params, u'player id')
|
||||||
|
target = self._search_regex(r'target: [\'"](.+?)[\'"]', params, u'target')
|
||||||
|
data = compat_urllib_parse.urlencode({'videoId': video_id,
|
||||||
|
'playerId': player_id,
|
||||||
|
'target': target,
|
||||||
|
})
|
||||||
|
base_info_url = self._search_regex(r'url = [\'"](.+?)[\'"][,;]',
|
||||||
|
webpage, u'base info url',
|
||||||
|
default='http://player.cnevids.com/player/loader.js?')
|
||||||
|
info_url = base_info_url + data
|
||||||
|
info_page = self._download_webpage(info_url, video_id,
|
||||||
|
u'Downloading video info')
|
||||||
|
video_info = self._search_regex(r'var video = ({.+?});', info_page, u'video info')
|
||||||
|
video_info = json.loads(video_info)
|
||||||
|
|
||||||
|
def _formats_sort_key(f):
|
||||||
|
type_ord = 1 if f['type'] == 'video/mp4' else 0
|
||||||
|
quality_ord = 1 if f['quality'] == 'high' else 0
|
||||||
|
return (quality_ord, type_ord)
|
||||||
|
best_format = sorted(video_info['sources'][0], key=_formats_sort_key)[-1]
|
||||||
|
|
||||||
|
return {'id': video_id,
|
||||||
|
'url': best_format['src'],
|
||||||
|
'ext': best_format['type'].split('/')[-1],
|
||||||
|
'title': video_info['title'],
|
||||||
|
'thumbnail': video_info['poster_frame'],
|
||||||
|
'description': description,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
site = mobj.group('site')
|
||||||
|
url_type = mobj.group('type')
|
||||||
|
id = mobj.group('id')
|
||||||
|
|
||||||
|
self.to_screen(u'Extracting from %s with the Condé Nast extractor' % self._SITES[site])
|
||||||
|
webpage = self._download_webpage(url, id)
|
||||||
|
|
||||||
|
if url_type == 'series':
|
||||||
|
return self._extract_series(url, webpage)
|
||||||
|
else:
|
||||||
|
return self._extract_video(webpage)
|
||||||
40
youtube_dl/extractor/criterion.py
Normal file
40
youtube_dl/extractor/criterion.py
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import determine_ext
|
||||||
|
|
||||||
|
class CriterionIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://www\.criterion\.com/films/(\d*)-.+'
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.criterion.com/films/184-le-samourai',
|
||||||
|
u'file': u'184.mp4',
|
||||||
|
u'md5': u'bc51beba55685509883a9a7830919ec3',
|
||||||
|
u'info_dict': {
|
||||||
|
u"title": u"Le Samouraï",
|
||||||
|
u"description" : u'md5:a2b4b116326558149bef81f76dcbb93f',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group(1)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
final_url = self._search_regex(r'so.addVariable\("videoURL", "(.+?)"\)\;',
|
||||||
|
webpage, 'video url')
|
||||||
|
title = self._html_search_regex(r'<meta content="(.+?)" property="og:title" />',
|
||||||
|
webpage, 'video title')
|
||||||
|
description = self._html_search_regex(r'<meta name="description" content="(.+?)" />',
|
||||||
|
webpage, 'video description')
|
||||||
|
thumbnail = self._search_regex(r'so.addVariable\("thumbnailURL", "(.+?)"\)\;',
|
||||||
|
webpage, 'thumbnail url')
|
||||||
|
|
||||||
|
return {'id': video_id,
|
||||||
|
'url' : final_url,
|
||||||
|
'title': title,
|
||||||
|
'ext': determine_ext(final_url),
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
}
|
||||||
@@ -34,8 +34,6 @@ class CSpanIE(InfoExtractor):
|
|||||||
description = self._html_search_regex(r'<meta (?:property="og:|name=")description" content="(.*?)"',
|
description = self._html_search_regex(r'<meta (?:property="og:|name=")description" content="(.*?)"',
|
||||||
webpage, 'description',
|
webpage, 'description',
|
||||||
flags=re.MULTILINE|re.DOTALL)
|
flags=re.MULTILINE|re.DOTALL)
|
||||||
thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.*?)"',
|
|
||||||
webpage, 'thumbnail')
|
|
||||||
|
|
||||||
url = self._search_regex(r'<string name="URL">(.*?)</string>',
|
url = self._search_regex(r'<string name="URL">(.*?)</string>',
|
||||||
video_info, 'video url')
|
video_info, 'video url')
|
||||||
@@ -49,5 +47,5 @@ class CSpanIE(InfoExtractor):
|
|||||||
'url': url,
|
'url': url,
|
||||||
'play_path': path,
|
'play_path': path,
|
||||||
'description': description,
|
'description': description,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -39,9 +39,6 @@ class DailymotionIE(InfoExtractor):
|
|||||||
# Extract URL, uploader and title from webpage
|
# Extract URL, uploader and title from webpage
|
||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
|
|
||||||
video_title = self._html_search_regex(r'<meta property="og:title" content="(.*?)" />',
|
|
||||||
webpage, 'title')
|
|
||||||
|
|
||||||
video_uploader = self._search_regex([r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>',
|
video_uploader = self._search_regex([r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>',
|
||||||
# Looking for official user
|
# Looking for official user
|
||||||
r'<(?:span|a) .*?rel="author".*?>([^<]+?)</'],
|
r'<(?:span|a) .*?rel="author".*?>([^<]+?)</'],
|
||||||
@@ -76,7 +73,7 @@ class DailymotionIE(InfoExtractor):
|
|||||||
'url': video_url,
|
'url': video_url,
|
||||||
'uploader': video_uploader,
|
'uploader': video_uploader,
|
||||||
'upload_date': video_upload_date,
|
'upload_date': video_upload_date,
|
||||||
'title': video_title,
|
'title': self._og_search_title(webpage),
|
||||||
'ext': video_extension,
|
'ext': video_extension,
|
||||||
'thumbnail': info['thumbnail_url']
|
'thumbnail': info['thumbnail_url']
|
||||||
}]
|
}]
|
||||||
|
|||||||
41
youtube_dl/extractor/dotsub.py
Normal file
41
youtube_dl/extractor/dotsub.py
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
import re
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class DotsubIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'(?:http://)?(?:www\.)?dotsub\.com/view/([^/]+)'
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://dotsub.com/view/aed3b8b2-1889-4df5-ae63-ad85f5572f27',
|
||||||
|
u'file': u'aed3b8b2-1889-4df5-ae63-ad85f5572f27.flv',
|
||||||
|
u'md5': u'0914d4d69605090f623b7ac329fea66e',
|
||||||
|
u'info_dict': {
|
||||||
|
u"title": u"Pyramids of Waste (2010), AKA The Lightbulb Conspiracy - Planned obsolescence documentary",
|
||||||
|
u"uploader": u"4v4l0n42",
|
||||||
|
u'description': u'Pyramids of Waste (2010) also known as "The lightbulb conspiracy" is a documentary about how our economic system based on consumerism and planned obsolescence is breaking our planet down.\r\n\r\nSolutions to this can be found at:\r\nhttp://robotswillstealyourjob.com\r\nhttp://www.federicopistono.org\r\n\r\nhttp://opensourceecology.org\r\nhttp://thezeitgeistmovement.com',
|
||||||
|
u'thumbnail': u'http://dotsub.com/media/aed3b8b2-1889-4df5-ae63-ad85f5572f27/p',
|
||||||
|
u'upload_date': u'20101213',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group(1)
|
||||||
|
info_url = "https://dotsub.com/api/media/%s/metadata" %(video_id)
|
||||||
|
webpage = self._download_webpage(info_url, video_id)
|
||||||
|
info = json.loads(webpage)
|
||||||
|
date = time.gmtime(info['dateCreated']/1000) # The timestamp is in miliseconds
|
||||||
|
|
||||||
|
return [{
|
||||||
|
'id': video_id,
|
||||||
|
'url': info['mediaURI'],
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': info['title'],
|
||||||
|
'thumbnail': info['screenshotURI'],
|
||||||
|
'description': info['description'],
|
||||||
|
'uploader': info['user'],
|
||||||
|
'view_count': info['numberOfViews'],
|
||||||
|
'upload_date': u'%04i%02i%02i' % (date.tm_year, date.tm_mon, date.tm_mday),
|
||||||
|
}]
|
||||||
@@ -6,7 +6,6 @@ import xml.etree.ElementTree
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
ExtractorError,
|
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -68,6 +67,7 @@ class DreiSatIE(InfoExtractor):
|
|||||||
formats.sort(key=_sortkey)
|
formats.sort(key=_sortkey)
|
||||||
|
|
||||||
info = {
|
info = {
|
||||||
|
'_type': 'video',
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
@@ -82,4 +82,4 @@ class DreiSatIE(InfoExtractor):
|
|||||||
info['url'] = formats[-1]['url']
|
info['url'] = formats[-1]['url']
|
||||||
info['ext'] = determine_ext(formats[-1]['url'])
|
info['ext'] = determine_ext(formats[-1]['url'])
|
||||||
|
|
||||||
return self.video_result(info)
|
return info
|
||||||
46
youtube_dl/extractor/ehow.py
Normal file
46
youtube_dl/extractor/ehow.py
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
import re
|
||||||
|
|
||||||
|
from ..utils import (
|
||||||
|
compat_urllib_parse,
|
||||||
|
determine_ext
|
||||||
|
)
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class EHowIE(InfoExtractor):
|
||||||
|
IE_NAME = u'eHow'
|
||||||
|
_VALID_URL = r'(?:https?://)?(?:www\.)?ehow\.com/[^/_?]*_(?P<id>[0-9]+)'
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.ehow.com/video_12245069_hardwood-flooring-basics.html',
|
||||||
|
u'file': u'12245069.flv',
|
||||||
|
u'md5': u'9809b4e3f115ae2088440bcb4efbf371',
|
||||||
|
u'info_dict': {
|
||||||
|
u"title": u"Hardwood Flooring Basics",
|
||||||
|
u"description": u"Hardwood flooring may be time consuming, but its ultimately a pretty straightforward concept. Learn about hardwood flooring basics with help from a hardware flooring business owner in this free video...",
|
||||||
|
u"uploader": u"Erick Nathan"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
video_url = self._search_regex(r'(?:file|source)=(http[^\'"&]*)',
|
||||||
|
webpage, u'video URL')
|
||||||
|
final_url = compat_urllib_parse.unquote(video_url)
|
||||||
|
uploader = self._search_regex(r'<meta name="uploader" content="(.+?)" />',
|
||||||
|
webpage, u'uploader')
|
||||||
|
title = self._og_search_title(webpage).replace(' | eHow', '')
|
||||||
|
ext = determine_ext(final_url)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'video',
|
||||||
|
'id': video_id,
|
||||||
|
'url': final_url,
|
||||||
|
'ext': ext,
|
||||||
|
'title': title,
|
||||||
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
|
'description': self._og_search_description(webpage),
|
||||||
|
'uploader': uploader,
|
||||||
|
}
|
||||||
|
|
||||||
@@ -36,11 +36,7 @@ class EscapistIE(InfoExtractor):
|
|||||||
videoDesc = self._html_search_regex('<meta name="description" content="([^"]*)"',
|
videoDesc = self._html_search_regex('<meta name="description" content="([^"]*)"',
|
||||||
webpage, u'description', fatal=False)
|
webpage, u'description', fatal=False)
|
||||||
|
|
||||||
imgUrl = self._html_search_regex('<meta property="og:image" content="([^"]*)"',
|
playerUrl = self._og_search_video_url(webpage, name='player url')
|
||||||
webpage, u'thumbnail', fatal=False)
|
|
||||||
|
|
||||||
playerUrl = self._html_search_regex('<meta property="og:video" content="([^"]*)"',
|
|
||||||
webpage, u'player url')
|
|
||||||
|
|
||||||
title = self._html_search_regex('<meta name="title" content="([^"]*)"',
|
title = self._html_search_regex('<meta name="title" content="([^"]*)"',
|
||||||
webpage, u'player url').split(' : ')[-1]
|
webpage, u'player url').split(' : ')[-1]
|
||||||
@@ -70,7 +66,7 @@ class EscapistIE(InfoExtractor):
|
|||||||
'upload_date': None,
|
'upload_date': None,
|
||||||
'title': title,
|
'title': title,
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'thumbnail': imgUrl,
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
'description': videoDesc,
|
'description': videoDesc,
|
||||||
'player_url': playerUrl,
|
'player_url': playerUrl,
|
||||||
}
|
}
|
||||||
|
|||||||
42
youtube_dl/extractor/exfm.py
Normal file
42
youtube_dl/extractor/exfm.py
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
import re
|
||||||
|
import json
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class ExfmIE(InfoExtractor):
|
||||||
|
IE_NAME = u'exfm'
|
||||||
|
IE_DESC = u'ex.fm'
|
||||||
|
_VALID_URL = r'(?:http://)?(?:www\.)?ex\.fm/song/([^/]+)'
|
||||||
|
_SOUNDCLOUD_URL_ = r'(?:http://)?(?:www\.)?api\.soundcloud.com/tracks/([^/]+)/stream'
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://ex.fm/song/1bgtzg',
|
||||||
|
u'file': u'1bgtzg.mp3',
|
||||||
|
u'md5': u'8a7967a3fef10e59a1d6f86240fd41cf',
|
||||||
|
u'info_dict': {
|
||||||
|
u"title": u"We Can't Stop",
|
||||||
|
u"uploader": u"Miley Cyrus",
|
||||||
|
u'thumbnail': u'http://i1.sndcdn.com/artworks-000049666230-w9i7ef-t500x500.jpg?9d68d37'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
song_id = mobj.group(1)
|
||||||
|
info_url = "http://ex.fm/api/v3/song/%s" %(song_id)
|
||||||
|
webpage = self._download_webpage(info_url, song_id)
|
||||||
|
info = json.loads(webpage)
|
||||||
|
song_url = re.match(self._SOUNDCLOUD_URL_,info['song']['url'])
|
||||||
|
if song_url is not None:
|
||||||
|
song_url = song_url.group() + "?client_id=b45b1aa10f1ac2941910a7f0d10f8e28"
|
||||||
|
else:
|
||||||
|
song_url = info['song']['url']
|
||||||
|
return [{
|
||||||
|
'id': song_id,
|
||||||
|
'url': song_url,
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': info['song']['title'],
|
||||||
|
'thumbnail': info['song']['image']['large'],
|
||||||
|
'uploader': info['song']['artist'],
|
||||||
|
'view_count': info['song']['loved_count'],
|
||||||
|
}]
|
||||||
@@ -47,21 +47,12 @@ class FlickrIE(InfoExtractor):
|
|||||||
raise ExtractorError(u'Unable to extract video url')
|
raise ExtractorError(u'Unable to extract video url')
|
||||||
video_url = mobj.group(1) + unescapeHTML(mobj.group(2))
|
video_url = mobj.group(1) + unescapeHTML(mobj.group(2))
|
||||||
|
|
||||||
video_title = self._html_search_regex(r'<meta property="og:title" content=(?:"([^"]+)"|\'([^\']+)\')',
|
|
||||||
webpage, u'video title')
|
|
||||||
|
|
||||||
video_description = self._html_search_regex(r'<meta property="og:description" content=(?:"([^"]+)"|\'([^\']+)\')',
|
|
||||||
webpage, u'description', fatal=False)
|
|
||||||
|
|
||||||
thumbnail = self._html_search_regex(r'<meta property="og:image" content=(?:"([^"]+)"|\'([^\']+)\')',
|
|
||||||
webpage, u'thumbnail', fatal=False)
|
|
||||||
|
|
||||||
return [{
|
return [{
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': video_title,
|
'title': self._og_search_title(webpage),
|
||||||
'description': video_description,
|
'description': self._og_search_description(webpage),
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
'uploader_id': video_uploader_id,
|
'uploader_id': video_uploader_id,
|
||||||
}]
|
}]
|
||||||
|
|||||||
36
youtube_dl/extractor/freesound.py
Normal file
36
youtube_dl/extractor/freesound.py
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import determine_ext
|
||||||
|
|
||||||
|
class FreesoundIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'(?:https?://)?(?:www\.)?freesound\.org/people/([^/]+)/sounds/(?P<id>[^/]+)'
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.freesound.org/people/miklovan/sounds/194503/',
|
||||||
|
u'file': u'194503.mp3',
|
||||||
|
u'md5': u'12280ceb42c81f19a515c745eae07650',
|
||||||
|
u'info_dict': {
|
||||||
|
u"title": u"gulls in the city.wav",
|
||||||
|
u"uploader" : u"miklovan",
|
||||||
|
u'description': u'the sounds of seagulls in the city',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
music_id = mobj.group('id')
|
||||||
|
webpage = self._download_webpage(url, music_id)
|
||||||
|
title = self._html_search_regex(r'<div id="single_sample_header">.*?<a href="#">(.+?)</a>',
|
||||||
|
webpage, 'music title', flags=re.DOTALL)
|
||||||
|
music_url = self._og_search_property('audio', webpage, 'music url')
|
||||||
|
description = self._html_search_regex(r'<div id="sound_description">(.*?)</div>',
|
||||||
|
webpage, 'description', fatal=False, flags=re.DOTALL)
|
||||||
|
|
||||||
|
return [{
|
||||||
|
'id': music_id,
|
||||||
|
'title': title,
|
||||||
|
'url': music_url,
|
||||||
|
'uploader': self._og_search_property('audio:artist', webpage, 'music uploader'),
|
||||||
|
'ext': determine_ext(music_url),
|
||||||
|
'description': description,
|
||||||
|
}]
|
||||||
@@ -27,14 +27,11 @@ class FunnyOrDieIE(InfoExtractor):
|
|||||||
title = self._html_search_regex((r"<h1 class='player_page_h1'.*?>(?P<title>.*?)</h1>",
|
title = self._html_search_regex((r"<h1 class='player_page_h1'.*?>(?P<title>.*?)</h1>",
|
||||||
r'<title>(?P<title>[^<]+?)</title>'), webpage, 'title', flags=re.DOTALL)
|
r'<title>(?P<title>[^<]+?)</title>'), webpage, 'title', flags=re.DOTALL)
|
||||||
|
|
||||||
video_description = self._html_search_regex(r'<meta property="og:description" content="(?P<desc>.*?)"',
|
|
||||||
webpage, u'description', fatal=False, flags=re.DOTALL)
|
|
||||||
|
|
||||||
info = {
|
info = {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': video_description,
|
'description': self._og_search_description(webpage),
|
||||||
}
|
}
|
||||||
return [info]
|
return [info]
|
||||||
|
|||||||
@@ -4,14 +4,15 @@ import xml.etree.ElementTree
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
|
compat_urllib_parse,
|
||||||
)
|
)
|
||||||
|
|
||||||
class GameSpotIE(InfoExtractor):
|
class GameSpotIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?:http://)?(?:www\.)?gamespot\.com/([^/]+)/videos/([^/]+)-([^/d]+)/'
|
_VALID_URL = r'(?:http://)?(?:www\.)?gamespot\.com/.*-(?P<page_id>\d+)/?'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u"url": u"http://www.gamespot.com/arma-iii/videos/arma-iii-community-guide-sitrep-i-6410818/",
|
u"url": u"http://www.gamespot.com/arma-iii/videos/arma-iii-community-guide-sitrep-i-6410818/",
|
||||||
u"file": u"6410818.mp4",
|
u"file": u"6410818.mp4",
|
||||||
u"md5": u"5569d64ca98db01f0177c934fe8c1e9b",
|
u"md5": u"b2a30deaa8654fcccd43713a6b6a4825",
|
||||||
u"info_dict": {
|
u"info_dict": {
|
||||||
u"title": u"Arma III - Community Guide: SITREP I",
|
u"title": u"Arma III - Community Guide: SITREP I",
|
||||||
u"upload_date": u"20130627",
|
u"upload_date": u"20130627",
|
||||||
@@ -21,13 +22,22 @@ class GameSpotIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group(3).split("-")[-1]
|
page_id = mobj.group('page_id')
|
||||||
info_url = "http://www.gamespot.com/pages/video_player/xml.php?id="+str(video_id)
|
webpage = self._download_webpage(url, page_id)
|
||||||
|
video_id = self._html_search_regex([r'"og:video" content=".*?\?id=(\d+)"',
|
||||||
|
r'http://www\.gamespot\.com/videoembed/(\d+)'],
|
||||||
|
webpage, 'video id')
|
||||||
|
data = compat_urllib_parse.urlencode({'id': video_id, 'newplayer': '1'})
|
||||||
|
info_url = 'http://www.gamespot.com/pages/video_player/xml.php?' + data
|
||||||
info_xml = self._download_webpage(info_url, video_id)
|
info_xml = self._download_webpage(info_url, video_id)
|
||||||
doc = xml.etree.ElementTree.fromstring(info_xml)
|
doc = xml.etree.ElementTree.fromstring(info_xml)
|
||||||
clip_el = doc.find('./playList/clip')
|
clip_el = doc.find('./playList/clip')
|
||||||
|
|
||||||
video_url = clip_el.find('./URI').text
|
http_urls = [{'url': node.find('filePath').text,
|
||||||
|
'rate': int(node.find('rate').text)}
|
||||||
|
for node in clip_el.find('./httpURI')]
|
||||||
|
best_quality = sorted(http_urls, key=lambda f: f['rate'])[-1]
|
||||||
|
video_url = best_quality['url']
|
||||||
title = clip_el.find('./title').text
|
title = clip_el.find('./title').text
|
||||||
ext = video_url.rpartition('.')[2]
|
ext = video_url.rpartition('.')[2]
|
||||||
thumbnail_url = clip_el.find('./screenGrabURI').text
|
thumbnail_url = clip_el.find('./screenGrabURI').text
|
||||||
|
|||||||
@@ -1,68 +1,36 @@
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .mtv import MTVIE, _media_xml_tag
|
||||||
from ..utils import (
|
|
||||||
compat_urllib_parse,
|
|
||||||
|
|
||||||
ExtractorError,
|
class GametrailersIE(MTVIE):
|
||||||
)
|
"""
|
||||||
|
Gametrailers use the same videos system as MTVIE, it just changes the feed
|
||||||
class GametrailersIE(InfoExtractor):
|
url, where the uri is and the method to get the thumbnails.
|
||||||
|
"""
|
||||||
_VALID_URL = r'http://www.gametrailers.com/(?P<type>videos|reviews|full-episodes)/(?P<id>.*?)/(?P<title>.*)'
|
_VALID_URL = r'http://www.gametrailers.com/(?P<type>videos|reviews|full-episodes)/(?P<id>.*?)/(?P<title>.*)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.gametrailers.com/videos/zbvr8i/mirror-s-edge-2-e3-2013--debut-trailer',
|
u'url': u'http://www.gametrailers.com/videos/zbvr8i/mirror-s-edge-2-e3-2013--debut-trailer',
|
||||||
u'file': u'zbvr8i.flv',
|
u'file': u'70e9a5d7-cf25-4a10-9104-6f3e7342ae0d.mp4',
|
||||||
u'md5': u'c3edbc995ab4081976e16779bd96a878',
|
u'md5': u'4c8e67681a0ea7ec241e8c09b3ea8cf7',
|
||||||
u'info_dict': {
|
u'info_dict': {
|
||||||
u"title": u"E3 2013: Debut Trailer"
|
u'title': u'E3 2013: Debut Trailer',
|
||||||
|
u'description': u'Faith is back! Check out the World Premiere trailer for Mirror\'s Edge 2 straight from the EA Press Conference at E3 2013!',
|
||||||
},
|
},
|
||||||
u'skip': u'Requires rtmpdump'
|
|
||||||
}
|
}
|
||||||
|
# Overwrite MTVIE properties we don't want
|
||||||
|
_TESTS = []
|
||||||
|
|
||||||
|
_FEED_URL = 'http://www.gametrailers.com/feeds/mrss'
|
||||||
|
|
||||||
|
def _get_thumbnail_url(self, uri, itemdoc):
|
||||||
|
search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
|
||||||
|
return itemdoc.find(search_path).attrib['url']
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
if mobj is None:
|
|
||||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
video_type = mobj.group('type')
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
if video_type == 'full-episodes':
|
mgid = self._search_regex([r'data-video="(?P<mgid>mgid:.*?)"',
|
||||||
mgid_re = r'data-video="(?P<mgid>mgid:.*?)"'
|
r'data-contentId=\'(?P<mgid>mgid:.*?)\''],
|
||||||
else:
|
webpage, u'mgid')
|
||||||
mgid_re = r'data-contentId=\'(?P<mgid>mgid:.*?)\''
|
return self._get_videos_info(mgid)
|
||||||
mgid = self._search_regex(mgid_re, webpage, u'mgid')
|
|
||||||
data = compat_urllib_parse.urlencode({'uri': mgid, 'acceptMethods': 'fms'})
|
|
||||||
|
|
||||||
info_page = self._download_webpage('http://www.gametrailers.com/feeds/mrss?' + data,
|
|
||||||
video_id, u'Downloading video info')
|
|
||||||
links_webpage = self._download_webpage('http://www.gametrailers.com/feeds/mediagen/?' + data,
|
|
||||||
video_id, u'Downloading video urls info')
|
|
||||||
|
|
||||||
self.report_extraction(video_id)
|
|
||||||
info_re = r'''<title><!\[CDATA\[(?P<title>.*?)\]\]></title>.*
|
|
||||||
<description><!\[CDATA\[(?P<description>.*?)\]\]></description>.*
|
|
||||||
<image>.*
|
|
||||||
<url>(?P<thumb>.*?)</url>.*
|
|
||||||
</image>'''
|
|
||||||
|
|
||||||
m_info = re.search(info_re, info_page, re.VERBOSE|re.DOTALL)
|
|
||||||
if m_info is None:
|
|
||||||
raise ExtractorError(u'Unable to extract video info')
|
|
||||||
video_title = m_info.group('title')
|
|
||||||
video_description = m_info.group('description')
|
|
||||||
video_thumb = m_info.group('thumb')
|
|
||||||
|
|
||||||
m_urls = list(re.finditer(r'<src>(?P<url>.*)</src>', links_webpage))
|
|
||||||
if m_urls is None or len(m_urls) == 0:
|
|
||||||
raise ExtractorError(u'Unable to extract video url')
|
|
||||||
# They are sorted from worst to best quality
|
|
||||||
video_url = m_urls[-1].group('url')
|
|
||||||
|
|
||||||
return {'url': video_url,
|
|
||||||
'id': video_id,
|
|
||||||
'title': video_title,
|
|
||||||
# Videos are actually flv not mp4
|
|
||||||
'ext': 'flv',
|
|
||||||
'thumbnail': video_thumb,
|
|
||||||
'description': video_description,
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -1,3 +1,5 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
|
||||||
@@ -9,20 +11,34 @@ from ..utils import (
|
|||||||
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
)
|
)
|
||||||
|
from .brightcove import BrightcoveIE
|
||||||
|
|
||||||
class GenericIE(InfoExtractor):
|
class GenericIE(InfoExtractor):
|
||||||
IE_DESC = u'Generic downloader that works on some sites'
|
IE_DESC = u'Generic downloader that works on some sites'
|
||||||
_VALID_URL = r'.*'
|
_VALID_URL = r'.*'
|
||||||
IE_NAME = u'generic'
|
IE_NAME = u'generic'
|
||||||
_TEST = {
|
_TESTS = [
|
||||||
u'url': u'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
|
{
|
||||||
u'file': u'13601338388002.mp4',
|
u'url': u'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
|
||||||
u'md5': u'85b90ccc9d73b4acd9138d3af4c27f89',
|
u'file': u'13601338388002.mp4',
|
||||||
u'info_dict': {
|
u'md5': u'85b90ccc9d73b4acd9138d3af4c27f89',
|
||||||
u"uploader": u"www.hodiho.fr",
|
u'info_dict': {
|
||||||
u"title": u"R\u00e9gis plante sa Jeep"
|
u"uploader": u"www.hodiho.fr",
|
||||||
}
|
u"title": u"R\u00e9gis plante sa Jeep"
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
u'url': u'http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/',
|
||||||
|
u'file': u'2371591881001.mp4',
|
||||||
|
u'md5': u'9e80619e0a94663f0bdc849b4566af19',
|
||||||
|
u'note': u'Test Brightcove downloads and detection in GenericIE',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”',
|
||||||
|
u'uploader': u'8TV',
|
||||||
|
u'description': u'md5:a950cc4285c43e44d763d036710cd9cd',
|
||||||
|
}
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
def report_download_webpage(self, video_id):
|
def report_download_webpage(self, video_id):
|
||||||
"""Report webpage download."""
|
"""Report webpage download."""
|
||||||
@@ -103,6 +119,13 @@ class GenericIE(InfoExtractor):
|
|||||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||||
|
|
||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
|
# Look for BrigthCove:
|
||||||
|
m_brightcove = re.search(r'<object.+?class=([\'"]).*?BrightcoveExperience.*?\1.+?</object>', webpage, re.DOTALL)
|
||||||
|
if m_brightcove is not None:
|
||||||
|
self.to_screen(u'Brightcove video detected.')
|
||||||
|
bc_url = BrightcoveIE._build_brighcove_url(m_brightcove.group())
|
||||||
|
return self.url_result(bc_url, 'Brightcove')
|
||||||
|
|
||||||
# Start with something easy: JW Player in SWFObject
|
# Start with something easy: JW Player in SWFObject
|
||||||
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
|
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
|
||||||
if mobj is None:
|
if mobj is None:
|
||||||
|
|||||||
@@ -34,15 +34,11 @@ class HotNewHipHopIE(InfoExtractor):
|
|||||||
video_title = self._html_search_regex(r"<title>(.*)</title>",
|
video_title = self._html_search_regex(r"<title>(.*)</title>",
|
||||||
webpage_src, u'title')
|
webpage_src, u'title')
|
||||||
|
|
||||||
# Getting thumbnail and if not thumbnail sets correct title for WSHH candy video.
|
|
||||||
thumbnail = self._html_search_regex(r'"og:image" content="(.*)"',
|
|
||||||
webpage_src, u'thumbnail', fatal=False)
|
|
||||||
|
|
||||||
results = [{
|
results = [{
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url' : video_url,
|
'url' : video_url,
|
||||||
'title' : video_title,
|
'title' : video_title,
|
||||||
'thumbnail' : thumbnail,
|
'thumbnail' : self._og_search_thumbnail(webpage_src),
|
||||||
'ext' : 'mp3',
|
'ext' : 'mp3',
|
||||||
}]
|
}]
|
||||||
return results
|
return results
|
||||||
91
youtube_dl/extractor/ign.py
Normal file
91
youtube_dl/extractor/ign.py
Normal file
@@ -0,0 +1,91 @@
|
|||||||
|
import re
|
||||||
|
import json
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class IGNIE(InfoExtractor):
|
||||||
|
"""
|
||||||
|
Extractor for some of the IGN sites, like www.ign.com, es.ign.com de.ign.com.
|
||||||
|
Some videos of it.ign.com are also supported
|
||||||
|
"""
|
||||||
|
|
||||||
|
_VALID_URL = r'https?://.+?\.ign\.com/(?:videos|show_videos)(/.+)?/(?P<name_or_id>.+)'
|
||||||
|
IE_NAME = u'ign.com'
|
||||||
|
|
||||||
|
_CONFIG_URL_TEMPLATE = 'http://www.ign.com/videos/configs/id/%s.config'
|
||||||
|
_DESCRIPTION_RE = [r'<span class="page-object-description">(.+?)</span>',
|
||||||
|
r'id="my_show_video">.*?<p>(.*?)</p>',
|
||||||
|
]
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.ign.com/videos/2013/06/05/the-last-of-us-review',
|
||||||
|
u'file': u'8f862beef863986b2785559b9e1aa599.mp4',
|
||||||
|
u'md5': u'eac8bdc1890980122c3b66f14bdd02e9',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'The Last of Us Review',
|
||||||
|
u'description': u'md5:c8946d4260a4d43a00d5ae8ed998870c',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _find_video_id(self, webpage):
|
||||||
|
res_id = [r'data-video-id="(.+?)"',
|
||||||
|
r'<object id="vid_(.+?)"',
|
||||||
|
r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"',
|
||||||
|
]
|
||||||
|
return self._search_regex(res_id, webpage, 'video id')
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
name_or_id = mobj.group('name_or_id')
|
||||||
|
webpage = self._download_webpage(url, name_or_id)
|
||||||
|
video_id = self._find_video_id(webpage)
|
||||||
|
result = self._get_video_info(video_id)
|
||||||
|
description = self._html_search_regex(self._DESCRIPTION_RE,
|
||||||
|
webpage, 'video description',
|
||||||
|
flags=re.DOTALL)
|
||||||
|
result['description'] = description
|
||||||
|
return result
|
||||||
|
|
||||||
|
def _get_video_info(self, video_id):
|
||||||
|
config_url = self._CONFIG_URL_TEMPLATE % video_id
|
||||||
|
config = json.loads(self._download_webpage(config_url, video_id,
|
||||||
|
u'Downloading video info'))
|
||||||
|
media = config['playlist']['media']
|
||||||
|
video_url = media['url']
|
||||||
|
|
||||||
|
return {'id': media['metadata']['videoId'],
|
||||||
|
'url': video_url,
|
||||||
|
'ext': determine_ext(video_url),
|
||||||
|
'title': media['metadata']['title'],
|
||||||
|
'thumbnail': media['poster'][0]['url'].replace('{size}', 'grande'),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class OneUPIE(IGNIE):
|
||||||
|
"""Extractor for 1up.com, it uses the ign videos system."""
|
||||||
|
|
||||||
|
_VALID_URL = r'https?://gamevideos.1up.com/video/id/(?P<name_or_id>.+)'
|
||||||
|
IE_NAME = '1up.com'
|
||||||
|
|
||||||
|
_DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://gamevideos.1up.com/video/id/34976',
|
||||||
|
u'file': u'34976.mp4',
|
||||||
|
u'md5': u'68a54ce4ebc772e4b71e3123d413163d',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'Sniper Elite V2 - Trailer',
|
||||||
|
u'description': u'md5:5d289b722f5a6d940ca3136e9dae89cf',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
id = mobj.group('name_or_id')
|
||||||
|
result = super(OneUPIE, self)._real_extract(url)
|
||||||
|
result['id'] = id
|
||||||
|
return result
|
||||||
@@ -5,12 +5,13 @@ from .common import InfoExtractor
|
|||||||
class InstagramIE(InfoExtractor):
|
class InstagramIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?:http://)?instagram.com/p/(.*?)/'
|
_VALID_URL = r'(?:http://)?instagram.com/p/(.*?)/'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://instagram.com/p/aye83DjauH/#',
|
u'url': u'http://instagram.com/p/aye83DjauH/?foo=bar#abc',
|
||||||
u'file': u'aye83DjauH.mp4',
|
u'file': u'aye83DjauH.mp4',
|
||||||
u'md5': u'0d2da106a9d2631273e192b372806516',
|
u'md5': u'0d2da106a9d2631273e192b372806516',
|
||||||
u'info_dict': {
|
u'info_dict': {
|
||||||
u"uploader_id": u"naomipq",
|
u"uploader_id": u"naomipq",
|
||||||
u"title": u"Video by naomipq"
|
u"title": u"Video by naomipq",
|
||||||
|
u'description': u'md5:1f17f0ab29bd6fe2bfad705f58de3cb8',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -18,25 +19,17 @@ class InstagramIE(InfoExtractor):
|
|||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group(1)
|
video_id = mobj.group(1)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
video_url = self._html_search_regex(
|
uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"',
|
||||||
r'<meta property="og:video" content="(.+?)"',
|
webpage, u'uploader id', fatal=False)
|
||||||
webpage, u'video URL')
|
desc = self._search_regex(r'"caption":"(.*?)"', webpage, u'description',
|
||||||
thumbnail_url = self._html_search_regex(
|
fatal=False)
|
||||||
r'<meta property="og:image" content="(.+?)" />',
|
|
||||||
webpage, u'thumbnail URL', fatal=False)
|
|
||||||
html_title = self._html_search_regex(
|
|
||||||
r'<title>(.+?)</title>',
|
|
||||||
webpage, u'title', flags=re.DOTALL)
|
|
||||||
title = re.sub(u'(?: *\(Videos?\))? \u2022 Instagram$', '', html_title).strip()
|
|
||||||
uploader_id = self._html_search_regex(r'content="(.*?)\'s video on Instagram',
|
|
||||||
webpage, u'uploader name', fatal=False)
|
|
||||||
ext = 'mp4'
|
|
||||||
|
|
||||||
return [{
|
return [{
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'url': self._og_search_video_url(webpage),
|
||||||
'ext': ext,
|
'ext': 'mp4',
|
||||||
'title': title,
|
'title': u'Video by %s' % uploader_id,
|
||||||
'thumbnail': thumbnail_url,
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
'uploader_id' : uploader_id
|
'uploader_id' : uploader_id,
|
||||||
|
'description': desc,
|
||||||
}]
|
}]
|
||||||
|
|||||||
@@ -24,8 +24,7 @@ class KeekIE(InfoExtractor):
|
|||||||
thumbnail = u'http://cdn.keek.com/keek/thumbnail/%s/w100/h75' % video_id
|
thumbnail = u'http://cdn.keek.com/keek/thumbnail/%s/w100/h75' % video_id
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
video_title = self._html_search_regex(r'<meta property="og:title" content="(?P<title>.*?)"',
|
video_title = self._og_search_title(webpage)
|
||||||
webpage, u'title')
|
|
||||||
|
|
||||||
uploader = self._html_search_regex(r'<div class="user-name-and-bio">[\S\s]+?<h2>(?P<uploader>.+?)</h2>',
|
uploader = self._html_search_regex(r'<div class="user-name-and-bio">[\S\s]+?<h2>(?P<uploader>.+?)</h2>',
|
||||||
webpage, u'uploader', fatal=False)
|
webpage, u'uploader', fatal=False)
|
||||||
|
|||||||
@@ -33,11 +33,9 @@ class LiveLeakIE(InfoExtractor):
|
|||||||
video_url = self._search_regex(r'file: "(.*?)",',
|
video_url = self._search_regex(r'file: "(.*?)",',
|
||||||
webpage, u'video URL')
|
webpage, u'video URL')
|
||||||
|
|
||||||
video_title = self._html_search_regex(r'<meta property="og:title" content="(?P<title>.*?)"',
|
video_title = self._og_search_title(webpage).replace('LiveLeak.com -', '').strip()
|
||||||
webpage, u'title').replace('LiveLeak.com -', '').strip()
|
|
||||||
|
|
||||||
video_description = self._html_search_regex(r'<meta property="og:description" content="(?P<desc>.*?)"',
|
video_description = self._og_search_description(webpage)
|
||||||
webpage, u'description', fatal=False)
|
|
||||||
|
|
||||||
video_uploader = self._html_search_regex(r'By:.*?(\w+)</a>',
|
video_uploader = self._html_search_regex(r'By:.*?(\w+)</a>',
|
||||||
webpage, u'uploader', fatal=False)
|
webpage, u'uploader', fatal=False)
|
||||||
|
|||||||
52
youtube_dl/extractor/livestream.py
Normal file
52
youtube_dl/extractor/livestream.py
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
import re
|
||||||
|
import json
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import compat_urllib_parse_urlparse, compat_urlparse
|
||||||
|
|
||||||
|
|
||||||
|
class LivestreamIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://new.livestream.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>\d+))?/?$'
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
|
||||||
|
u'file': u'4719370.mp4',
|
||||||
|
u'md5': u'0d2186e3187d185a04b3cdd02b828836',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'Live from Webster Hall NYC',
|
||||||
|
u'upload_date': u'20121012',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _extract_video_info(self, video_data):
|
||||||
|
video_url = video_data.get('progressive_url_hd') or video_data.get('progressive_url')
|
||||||
|
return {'id': video_data['id'],
|
||||||
|
'url': video_url,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': video_data['caption'],
|
||||||
|
'thumbnail': video_data['thumbnail_url'],
|
||||||
|
'upload_date': video_data['updated_at'].replace('-','')[:8],
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
event_name = mobj.group('event_name')
|
||||||
|
webpage = self._download_webpage(url, video_id or event_name)
|
||||||
|
|
||||||
|
if video_id is None:
|
||||||
|
# This is an event page:
|
||||||
|
api_url = self._search_regex(r'event_design_eventId: \'(.+?)\'',
|
||||||
|
webpage, 'api url')
|
||||||
|
info = json.loads(self._download_webpage(api_url, event_name,
|
||||||
|
u'Downloading event info'))
|
||||||
|
videos = [self._extract_video_info(video_data['data'])
|
||||||
|
for video_data in info['feed']['data'] if video_data['type'] == u'video']
|
||||||
|
return self.playlist_result(videos, info['id'], info['full_name'])
|
||||||
|
else:
|
||||||
|
og_video = self._og_search_video_url(webpage, name=u'player url')
|
||||||
|
query_str = compat_urllib_parse_urlparse(og_video).query
|
||||||
|
query = compat_urlparse.parse_qs(query_str)
|
||||||
|
api_url = query['play_url'][0].replace('.smil', '')
|
||||||
|
info = json.loads(self._download_webpage(api_url, video_id,
|
||||||
|
u'Downloading video info'))
|
||||||
|
return self._extract_video_info(info)
|
||||||
@@ -9,7 +9,7 @@ from ..utils import (
|
|||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
compat_str,
|
compat_str,
|
||||||
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -20,7 +20,7 @@ class MetacafeIE(InfoExtractor):
|
|||||||
_DISCLAIMER = 'http://www.metacafe.com/family_filter/'
|
_DISCLAIMER = 'http://www.metacafe.com/family_filter/'
|
||||||
_FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
|
_FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
|
||||||
IE_NAME = u'metacafe'
|
IE_NAME = u'metacafe'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
u"add_ie": ["Youtube"],
|
u"add_ie": ["Youtube"],
|
||||||
u"url": u"http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/",
|
u"url": u"http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/",
|
||||||
u"file": u"_aUehQsCQtM.flv",
|
u"file": u"_aUehQsCQtM.flv",
|
||||||
@@ -31,7 +31,16 @@ class MetacafeIE(InfoExtractor):
|
|||||||
u"uploader": u"PBS",
|
u"uploader": u"PBS",
|
||||||
u"uploader_id": u"PBS"
|
u"uploader_id": u"PBS"
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
|
{
|
||||||
|
u"url": u"http://www.metacafe.com/watch/an-dVVXnuY7Jh77J/the_andromeda_strain_1971_stop_the_bomb_part_3/",
|
||||||
|
u"file": u"an-dVVXnuY7Jh77J.mp4",
|
||||||
|
u"info_dict": {
|
||||||
|
u"title": u"The Andromeda Strain (1971): Stop the Bomb Part 3",
|
||||||
|
u"uploader": u"anyclip",
|
||||||
|
u"description": u"md5:38c711dd98f5bb87acf973d573442e67"
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
|
||||||
def report_disclaimer(self):
|
def report_disclaimer(self):
|
||||||
@@ -73,14 +82,16 @@ class MetacafeIE(InfoExtractor):
|
|||||||
return [self.url_result('http://www.youtube.com/watch?v=%s' % mobj2.group(1), 'Youtube')]
|
return [self.url_result('http://www.youtube.com/watch?v=%s' % mobj2.group(1), 'Youtube')]
|
||||||
|
|
||||||
# Retrieve video webpage to extract further information
|
# Retrieve video webpage to extract further information
|
||||||
webpage = self._download_webpage('http://www.metacafe.com/watch/%s/' % video_id, video_id)
|
req = compat_urllib_request.Request('http://www.metacafe.com/watch/%s/' % video_id)
|
||||||
|
req.headers['Cookie'] = 'flashVersion=0;'
|
||||||
|
webpage = self._download_webpage(req, video_id)
|
||||||
|
|
||||||
# Extract URL, uploader and title from webpage
|
# Extract URL, uploader and title from webpage
|
||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
|
mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
mediaURL = compat_urllib_parse.unquote(mobj.group(1))
|
mediaURL = compat_urllib_parse.unquote(mobj.group(1))
|
||||||
video_extension = mediaURL[-3:]
|
video_ext = mediaURL[-3:]
|
||||||
|
|
||||||
# Extract gdaKey if available
|
# Extract gdaKey if available
|
||||||
mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
|
mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
|
||||||
@@ -90,34 +101,37 @@ class MetacafeIE(InfoExtractor):
|
|||||||
gdaKey = mobj.group(1)
|
gdaKey = mobj.group(1)
|
||||||
video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
|
video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
|
||||||
else:
|
else:
|
||||||
mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
|
mobj = re.search(r'<video src="([^"]+)"', webpage)
|
||||||
if mobj is None:
|
if mobj:
|
||||||
raise ExtractorError(u'Unable to extract media URL')
|
video_url = mobj.group(1)
|
||||||
vardict = compat_parse_qs(mobj.group(1))
|
video_ext = 'mp4'
|
||||||
if 'mediaData' not in vardict:
|
else:
|
||||||
raise ExtractorError(u'Unable to extract media URL')
|
mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
|
||||||
mobj = re.search(r'"mediaURL":"(?P<mediaURL>http.*?)",(.*?)"key":"(?P<key>.*?)"', vardict['mediaData'][0])
|
if mobj is None:
|
||||||
if mobj is None:
|
raise ExtractorError(u'Unable to extract media URL')
|
||||||
raise ExtractorError(u'Unable to extract media URL')
|
vardict = compat_parse_qs(mobj.group(1))
|
||||||
mediaURL = mobj.group('mediaURL').replace('\\/', '/')
|
if 'mediaData' not in vardict:
|
||||||
video_extension = mediaURL[-3:]
|
raise ExtractorError(u'Unable to extract media URL')
|
||||||
video_url = '%s?__gda__=%s' % (mediaURL, mobj.group('key'))
|
mobj = re.search(r'"mediaURL":"(?P<mediaURL>http.*?)",(.*?)"key":"(?P<key>.*?)"', vardict['mediaData'][0])
|
||||||
|
if mobj is None:
|
||||||
|
raise ExtractorError(u'Unable to extract media URL')
|
||||||
|
mediaURL = mobj.group('mediaURL').replace('\\/', '/')
|
||||||
|
video_url = '%s?__gda__=%s' % (mediaURL, mobj.group('key'))
|
||||||
|
video_ext = determine_ext(video_url)
|
||||||
|
|
||||||
mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
|
video_title = self._html_search_regex(r'(?im)<title>(.*) - Video</title>', webpage, u'title')
|
||||||
if mobj is None:
|
description = self._og_search_description(webpage)
|
||||||
raise ExtractorError(u'Unable to extract title')
|
video_uploader = self._html_search_regex(
|
||||||
video_title = mobj.group(1).decode('utf-8')
|
r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("channel","([^"]+)"\);',
|
||||||
|
webpage, u'uploader nickname', fatal=False)
|
||||||
|
|
||||||
mobj = re.search(r'submitter=(.*?);', webpage)
|
return {
|
||||||
if mobj is None:
|
'_type': 'video',
|
||||||
raise ExtractorError(u'Unable to extract uploader nickname')
|
'id': video_id,
|
||||||
video_uploader = mobj.group(1)
|
'url': video_url,
|
||||||
|
'description': description,
|
||||||
return [{
|
'uploader': video_uploader,
|
||||||
'id': video_id.decode('utf-8'),
|
|
||||||
'url': video_url.decode('utf-8'),
|
|
||||||
'uploader': video_uploader.decode('utf-8'),
|
|
||||||
'upload_date': None,
|
'upload_date': None,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'ext': video_extension.decode('utf-8'),
|
'ext': video_ext,
|
||||||
}]
|
}
|
||||||
|
|||||||
@@ -1,28 +1,110 @@
|
|||||||
import re
|
import re
|
||||||
import socket
|
|
||||||
import xml.etree.ElementTree
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_http_client,
|
compat_urllib_parse,
|
||||||
compat_str,
|
|
||||||
compat_urllib_error,
|
|
||||||
compat_urllib_request,
|
|
||||||
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _media_xml_tag(tag):
|
||||||
|
return '{http://search.yahoo.com/mrss/}%s' % tag
|
||||||
|
|
||||||
class MTVIE(InfoExtractor):
|
class MTVIE(InfoExtractor):
|
||||||
_VALID_URL = r'^(?P<proto>https?://)?(?:www\.)?mtv\.com/videos/[^/]+/(?P<videoid>[0-9]+)/[^/]+$'
|
_VALID_URL = r'^https?://(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$'
|
||||||
_WORKING = False
|
|
||||||
|
_FEED_URL = 'http://www.mtv.com/player/embed/AS3/rss/'
|
||||||
|
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
u'url': u'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml',
|
||||||
|
u'file': u'853555.mp4',
|
||||||
|
u'md5': u'850f3f143316b1e71fa56a4edfd6e0f8',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'Taylor Swift - "Ours (VH1 Storytellers)"',
|
||||||
|
u'description': u'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
u'url': u'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml',
|
||||||
|
u'file': u'USCJY1331283.mp4',
|
||||||
|
u'md5': u'73b4e7fcadd88929292fe52c3ced8caf',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'Everything Has Changed',
|
||||||
|
u'upload_date': u'20130606',
|
||||||
|
u'uploader': u'Taylor Swift',
|
||||||
|
},
|
||||||
|
u'skip': u'VEVO is only available in some countries',
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _id_from_uri(uri):
|
||||||
|
return uri.split(':')[-1]
|
||||||
|
|
||||||
|
# This was originally implemented for ComedyCentral, but it also works here
|
||||||
|
@staticmethod
|
||||||
|
def _transform_rtmp_url(rtmp_video_url):
|
||||||
|
m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\..+?/.*)$', rtmp_video_url)
|
||||||
|
if not m:
|
||||||
|
raise ExtractorError(u'Cannot transform RTMP url')
|
||||||
|
base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/'
|
||||||
|
return base + m.group('finalid')
|
||||||
|
|
||||||
|
def _get_thumbnail_url(self, uri, itemdoc):
|
||||||
|
return 'http://mtv.mtvnimages.com/uri/' + uri
|
||||||
|
|
||||||
|
def _extract_video_url(self, metadataXml):
|
||||||
|
if '/error_country_block.swf' in metadataXml:
|
||||||
|
raise ExtractorError(u'This video is not available from your country.', expected=True)
|
||||||
|
mdoc = xml.etree.ElementTree.fromstring(metadataXml.encode('utf-8'))
|
||||||
|
renditions = mdoc.findall('.//rendition')
|
||||||
|
|
||||||
|
# For now, always pick the highest quality.
|
||||||
|
rendition = renditions[-1]
|
||||||
|
|
||||||
|
try:
|
||||||
|
_,_,ext = rendition.attrib['type'].partition('/')
|
||||||
|
format = ext + '-' + rendition.attrib['width'] + 'x' + rendition.attrib['height'] + '_' + rendition.attrib['bitrate']
|
||||||
|
rtmp_video_url = rendition.find('./src').text
|
||||||
|
except KeyError:
|
||||||
|
raise ExtractorError('Invalid rendition field.')
|
||||||
|
video_url = self._transform_rtmp_url(rtmp_video_url)
|
||||||
|
return {'ext': ext, 'url': video_url, 'format': format}
|
||||||
|
|
||||||
|
def _get_video_info(self, itemdoc):
|
||||||
|
uri = itemdoc.find('guid').text
|
||||||
|
video_id = self._id_from_uri(uri)
|
||||||
|
self.report_extraction(video_id)
|
||||||
|
mediagen_url = itemdoc.find('%s/%s' % (_media_xml_tag('group'), _media_xml_tag('content'))).attrib['url']
|
||||||
|
if 'acceptMethods' not in mediagen_url:
|
||||||
|
mediagen_url += '&acceptMethods=fms'
|
||||||
|
mediagen_page = self._download_webpage(mediagen_url, video_id,
|
||||||
|
u'Downloading video urls')
|
||||||
|
video_info = self._extract_video_url(mediagen_page)
|
||||||
|
|
||||||
|
description_node = itemdoc.find('description')
|
||||||
|
if description_node is not None:
|
||||||
|
description = description_node.text
|
||||||
|
else:
|
||||||
|
description = None
|
||||||
|
video_info.update({'title': itemdoc.find('title').text,
|
||||||
|
'id': video_id,
|
||||||
|
'thumbnail': self._get_thumbnail_url(uri, itemdoc),
|
||||||
|
'description': description,
|
||||||
|
})
|
||||||
|
return video_info
|
||||||
|
|
||||||
|
def _get_videos_info(self, uri):
|
||||||
|
video_id = self._id_from_uri(uri)
|
||||||
|
data = compat_urllib_parse.urlencode({'uri': uri})
|
||||||
|
infoXml = self._download_webpage(self._FEED_URL +'?' + data, video_id,
|
||||||
|
u'Downloading info')
|
||||||
|
idoc = xml.etree.ElementTree.fromstring(infoXml.encode('utf-8'))
|
||||||
|
return [self._get_video_info(item) for item in idoc.findall('.//item')]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
if mobj is None:
|
|
||||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
|
||||||
if not mobj.group('proto'):
|
|
||||||
url = 'http://' + url
|
|
||||||
video_id = mobj.group('videoid')
|
video_id = mobj.group('videoid')
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
@@ -35,46 +117,5 @@ class MTVIE(InfoExtractor):
|
|||||||
self.to_screen(u'Vevo video detected: %s' % vevo_id)
|
self.to_screen(u'Vevo video detected: %s' % vevo_id)
|
||||||
return self.url_result('vevo:%s' % vevo_id, ie='Vevo')
|
return self.url_result('vevo:%s' % vevo_id, ie='Vevo')
|
||||||
|
|
||||||
#song_name = self._html_search_regex(r'<meta name="mtv_vt" content="([^"]+)"/>',
|
uri = self._html_search_regex(r'/uri/(.*?)\?', webpage, u'uri')
|
||||||
# webpage, u'song name', fatal=False)
|
return self._get_videos_info(uri)
|
||||||
|
|
||||||
video_title = self._html_search_regex(r'<meta name="mtv_an" content="([^"]+)"/>',
|
|
||||||
webpage, u'title')
|
|
||||||
|
|
||||||
mtvn_uri = self._html_search_regex(r'<meta name="mtvn_uri" content="([^"]+)"/>',
|
|
||||||
webpage, u'mtvn_uri', fatal=False)
|
|
||||||
|
|
||||||
content_id = self._search_regex(r'MTVN.Player.defaultPlaylistId = ([0-9]+);',
|
|
||||||
webpage, u'content id', fatal=False)
|
|
||||||
|
|
||||||
videogen_url = 'http://www.mtv.com/player/includes/mediaGen.jhtml?uri=' + mtvn_uri + '&id=' + content_id + '&vid=' + video_id + '&ref=www.mtvn.com&viewUri=' + mtvn_uri
|
|
||||||
self.report_extraction(video_id)
|
|
||||||
request = compat_urllib_request.Request(videogen_url)
|
|
||||||
try:
|
|
||||||
metadataXml = compat_urllib_request.urlopen(request).read()
|
|
||||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
|
||||||
raise ExtractorError(u'Unable to download video metadata: %s' % compat_str(err))
|
|
||||||
|
|
||||||
mdoc = xml.etree.ElementTree.fromstring(metadataXml)
|
|
||||||
renditions = mdoc.findall('.//rendition')
|
|
||||||
|
|
||||||
# For now, always pick the highest quality.
|
|
||||||
rendition = renditions[-1]
|
|
||||||
|
|
||||||
try:
|
|
||||||
_,_,ext = rendition.attrib['type'].partition('/')
|
|
||||||
format = ext + '-' + rendition.attrib['width'] + 'x' + rendition.attrib['height'] + '_' + rendition.attrib['bitrate']
|
|
||||||
video_url = rendition.find('./src').text
|
|
||||||
except KeyError:
|
|
||||||
raise ExtractorError('Invalid rendition field.')
|
|
||||||
|
|
||||||
info = {
|
|
||||||
'id': video_id,
|
|
||||||
'url': video_url,
|
|
||||||
'upload_date': None,
|
|
||||||
'title': video_title,
|
|
||||||
'ext': ext,
|
|
||||||
'format': format,
|
|
||||||
}
|
|
||||||
|
|
||||||
return [info]
|
|
||||||
|
|||||||
@@ -30,8 +30,7 @@ class NBAIE(InfoExtractor):
|
|||||||
video_url = u'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4'
|
video_url = u'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4'
|
||||||
|
|
||||||
shortened_video_id = video_id.rpartition('/')[2]
|
shortened_video_id = video_id.rpartition('/')[2]
|
||||||
title = self._html_search_regex(r'<meta property="og:title" content="(.*?)"',
|
title = self._og_search_title(webpage, default=shortened_video_id).replace('NBA.com: ', '')
|
||||||
webpage, 'title', default=shortened_video_id).replace('NBA.com: ', '')
|
|
||||||
|
|
||||||
# It isn't there in the HTML it returns to us
|
# It isn't there in the HTML it returns to us
|
||||||
# uploader_date = self._html_search_regex(r'<b>Date:</b> (.*?)</div>', webpage, 'upload_date', fatal=False)
|
# uploader_date = self._html_search_regex(r'<b>Date:</b> (.*?)</div>', webpage, 'upload_date', fatal=False)
|
||||||
|
|||||||
67
youtube_dl/extractor/sina.py
Normal file
67
youtube_dl/extractor/sina.py
Normal file
@@ -0,0 +1,67 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
import re
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_urllib_request,
|
||||||
|
compat_urllib_parse,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class SinaIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'''https?://(.*?\.)?video\.sina\.com\.cn/
|
||||||
|
(
|
||||||
|
(.+?/(((?P<pseudo_id>\d+).html)|(.*?(\#|(vid=))(?P<id>\d+?)($|&))))
|
||||||
|
|
|
||||||
|
# This is used by external sites like Weibo
|
||||||
|
(api/sinawebApi/outplay.php/(?P<token>.+?)\.swf)
|
||||||
|
)
|
||||||
|
'''
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://video.sina.com.cn/news/vlist/zt/chczlj2013/?opsubject_id=top12#110028898',
|
||||||
|
u'file': u'110028898.flv',
|
||||||
|
u'md5': u'd65dd22ddcf44e38ce2bf58a10c3e71f',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'《中国新闻》 朝鲜要求巴拿马立即释放被扣船员',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return re.match(cls._VALID_URL, url, flags=re.VERBOSE) is not None
|
||||||
|
|
||||||
|
def _extract_video(self, video_id):
|
||||||
|
data = compat_urllib_parse.urlencode({'vid': video_id})
|
||||||
|
url_page = self._download_webpage('http://v.iask.com/v_play.php?%s' % data,
|
||||||
|
video_id, u'Downloading video url')
|
||||||
|
image_page = self._download_webpage(
|
||||||
|
'http://interface.video.sina.com.cn/interface/common/getVideoImage.php?%s' % data,
|
||||||
|
video_id, u'Downloading thumbnail info')
|
||||||
|
url_doc = xml.etree.ElementTree.fromstring(url_page.encode('utf-8'))
|
||||||
|
|
||||||
|
return {'id': video_id,
|
||||||
|
'url': url_doc.find('./durl/url').text,
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': url_doc.find('./vname').text,
|
||||||
|
'thumbnail': image_page.split('=')[1],
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
if mobj.group('token') is not None:
|
||||||
|
# The video id is in the redirected url
|
||||||
|
self.to_screen(u'Getting video id')
|
||||||
|
request = compat_urllib_request.Request(url)
|
||||||
|
request.get_method = lambda: 'HEAD'
|
||||||
|
(_, urlh) = self._download_webpage_handle(request, 'NA', False)
|
||||||
|
return self._real_extract(urlh.geturl())
|
||||||
|
elif video_id is None:
|
||||||
|
pseudo_id = mobj.group('pseudo_id')
|
||||||
|
webpage = self._download_webpage(url, pseudo_id)
|
||||||
|
video_id = self._search_regex(r'vid:\'(\d+?)\'', webpage, u'video id')
|
||||||
|
|
||||||
|
return self._extract_video(video_id)
|
||||||
@@ -19,7 +19,7 @@ class SoundcloudIE(InfoExtractor):
|
|||||||
of the stream token and uid
|
of the stream token and uid
|
||||||
"""
|
"""
|
||||||
|
|
||||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)(?:[?].*)?$'
|
_VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)/?(?:[?].*)?$'
|
||||||
IE_NAME = u'soundcloud'
|
IE_NAME = u'soundcloud'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
|
u'url': u'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
|
||||||
|
|||||||
@@ -18,12 +18,6 @@ class StatigramIE(InfoExtractor):
|
|||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group(1)
|
video_id = mobj.group(1)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
video_url = self._html_search_regex(
|
|
||||||
r'<meta property="og:video:secure_url" content="(.+?)">',
|
|
||||||
webpage, u'video URL')
|
|
||||||
thumbnail_url = self._html_search_regex(
|
|
||||||
r'<meta property="og:image" content="(.+?)" />',
|
|
||||||
webpage, u'thumbnail URL', fatal=False)
|
|
||||||
html_title = self._html_search_regex(
|
html_title = self._html_search_regex(
|
||||||
r'<title>(.+?)</title>',
|
r'<title>(.+?)</title>',
|
||||||
webpage, u'title')
|
webpage, u'title')
|
||||||
@@ -34,9 +28,9 @@ class StatigramIE(InfoExtractor):
|
|||||||
|
|
||||||
return [{
|
return [{
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'url': self._og_search_video_url(webpage),
|
||||||
'ext': ext,
|
'ext': ext,
|
||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': thumbnail_url,
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
'uploader_id' : uploader_id
|
'uploader_id' : uploader_id
|
||||||
}]
|
}]
|
||||||
|
|||||||
@@ -23,14 +23,16 @@ class SteamIE(InfoExtractor):
|
|||||||
u"file": u"81300.flv",
|
u"file": u"81300.flv",
|
||||||
u"md5": u"f870007cee7065d7c76b88f0a45ecc07",
|
u"md5": u"f870007cee7065d7c76b88f0a45ecc07",
|
||||||
u"info_dict": {
|
u"info_dict": {
|
||||||
u"title": u"Terraria 1.1 Trailer"
|
u"title": u"Terraria 1.1 Trailer",
|
||||||
|
u'playlist_index': 1,
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
u"file": u"80859.flv",
|
u"file": u"80859.flv",
|
||||||
u"md5": u"61aaf31a5c5c3041afb58fb83cbb5751",
|
u"md5": u"61aaf31a5c5c3041afb58fb83cbb5751",
|
||||||
u"info_dict": {
|
u"info_dict": {
|
||||||
u"title": u"Terraria Trailer"
|
u"title": u"Terraria Trailer",
|
||||||
|
u'playlist_index': 2,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -30,15 +30,6 @@ class TeamcocoIE(InfoExtractor):
|
|||||||
|
|
||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
|
|
||||||
video_title = self._html_search_regex(r'<meta property="og:title" content="(.+?)"',
|
|
||||||
webpage, u'title')
|
|
||||||
|
|
||||||
thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.+?)"',
|
|
||||||
webpage, u'thumbnail', fatal=False)
|
|
||||||
|
|
||||||
video_description = self._html_search_regex(r'<meta property="og:description" content="(.*?)"',
|
|
||||||
webpage, u'description', fatal=False)
|
|
||||||
|
|
||||||
data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
|
data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
|
||||||
data = self._download_webpage(data_url, video_id, 'Downloading data webpage')
|
data = self._download_webpage(data_url, video_id, 'Downloading data webpage')
|
||||||
|
|
||||||
@@ -49,7 +40,7 @@ class TeamcocoIE(InfoExtractor):
|
|||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': video_title,
|
'title': self._og_search_title(webpage),
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
'description': video_description,
|
'description': self._og_search_description(webpage),
|
||||||
}]
|
}]
|
||||||
|
|||||||
@@ -67,7 +67,7 @@ class TEDIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id, 'Downloading \"%s\" page' % video_name)
|
webpage = self._download_webpage(url, video_id, 'Downloading \"%s\" page' % video_name)
|
||||||
self.report_extraction(video_name)
|
self.report_extraction(video_name)
|
||||||
# If the url includes the language we get the title translated
|
# If the url includes the language we get the title translated
|
||||||
title = self._html_search_regex(r'<span id="altHeadline" >(?P<title>.*)</span>',
|
title = self._html_search_regex(r'<span .*?id="altHeadline".+?>(?P<title>.*)</span>',
|
||||||
webpage, 'title')
|
webpage, 'title')
|
||||||
json_data = self._search_regex(r'<script.*?>var talkDetails = ({.*?})</script>',
|
json_data = self._search_regex(r'<script.*?>var talkDetails = ({.*?})</script>',
|
||||||
webpage, 'json data')
|
webpage, 'json data')
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ class TF1IE(InfoExtractor):
|
|||||||
TF1 uses the wat.tv player, currently it can only download videos with the
|
TF1 uses the wat.tv player, currently it can only download videos with the
|
||||||
html5 player enabled, it cannot download HD videos.
|
html5 player enabled, it cannot download HD videos.
|
||||||
"""
|
"""
|
||||||
|
_WORKING = False
|
||||||
_VALID_URL = r'http://videos.tf1.fr/.*-(.*?).html'
|
_VALID_URL = r'http://videos.tf1.fr/.*-(.*?).html'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
|
u'url': u'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
|
||||||
|
|||||||
47
youtube_dl/extractor/thisav.py
Normal file
47
youtube_dl/extractor/thisav.py
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
#coding: utf-8
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
)
|
||||||
|
|
||||||
|
class ThisAVIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?thisav\.com/video/(?P<id>[0-9]+)/.*'
|
||||||
|
_TEST = {
|
||||||
|
u"url": u"http://www.thisav.com/video/47734/%98%26sup1%3B%83%9E%83%82---just-fit.html",
|
||||||
|
u"file": u"47734.flv",
|
||||||
|
u"md5": u"0480f1ef3932d901f0e0e719f188f19b",
|
||||||
|
u"info_dict": {
|
||||||
|
u"title": u"高樹マリア - Just fit",
|
||||||
|
u"uploader": u"dj7970",
|
||||||
|
u"uploader_id": u"dj7970"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
title = self._html_search_regex(r'<h1>([^<]*)</h1>', webpage, u'title')
|
||||||
|
video_url = self._html_search_regex(
|
||||||
|
r"addVariable\('file','([^']+)'\);", webpage, u'video url')
|
||||||
|
uploader = self._html_search_regex(
|
||||||
|
r': <a href="http://www.thisav.com/user/[0-9]+/(?:[^"]+)">([^<]+)</a>',
|
||||||
|
webpage, u'uploader name', fatal=False)
|
||||||
|
uploader_id = self._html_search_regex(
|
||||||
|
r': <a href="http://www.thisav.com/user/[0-9]+/([^"]+)">(?:[^<]+)</a>',
|
||||||
|
webpage, u'uploader id', fatal=False)
|
||||||
|
ext = determine_ext(video_url)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'video',
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'uploader': uploader,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'title': title,
|
||||||
|
'ext': ext,
|
||||||
|
}
|
||||||
@@ -24,10 +24,7 @@ class TrailerAddictIE(InfoExtractor):
|
|||||||
webpage, 'video title').replace(' - Trailer Addict','')
|
webpage, 'video title').replace(' - Trailer Addict','')
|
||||||
view_count = self._search_regex(r'Views: (.+?)<br />',
|
view_count = self._search_regex(r'Views: (.+?)<br />',
|
||||||
webpage, 'Views Count')
|
webpage, 'Views Count')
|
||||||
description = self._search_regex(r'<meta property="og:description" content="(.+?)" />',
|
video_id = self._og_search_property('video', webpage, 'Video id').split('=')[1]
|
||||||
webpage, 'video description')
|
|
||||||
video_id = self._search_regex(r'<meta property="og:video" content="(.+?)" />',
|
|
||||||
webpage, 'Video id').split('=')[1]
|
|
||||||
|
|
||||||
info_url = "http://www.traileraddict.com/fvar.php?tid=%s" %(str(video_id))
|
info_url = "http://www.traileraddict.com/fvar.php?tid=%s" %(str(video_id))
|
||||||
info_webpage = self._download_webpage(info_url, video_id , "Downloading the info webpage")
|
info_webpage = self._download_webpage(info_url, video_id , "Downloading the info webpage")
|
||||||
@@ -44,6 +41,6 @@ class TrailerAddictIE(InfoExtractor):
|
|||||||
'ext' : ext,
|
'ext' : ext,
|
||||||
'title' : title,
|
'title' : title,
|
||||||
'thumbnail' : thumbnail_url,
|
'thumbnail' : thumbnail_url,
|
||||||
'description' : description,
|
'description' : self._og_search_description(webpage),
|
||||||
'view_count' : view_count,
|
'view_count' : view_count,
|
||||||
}]
|
}]
|
||||||
|
|||||||
@@ -22,8 +22,6 @@ class TutvIE(InfoExtractor):
|
|||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
title = self._html_search_regex(
|
|
||||||
r'<meta property="og:title" content="(.*?)">', webpage, u'title')
|
|
||||||
internal_id = self._search_regex(r'codVideo=([0-9]+)', webpage, u'internal video ID')
|
internal_id = self._search_regex(r'codVideo=([0-9]+)', webpage, u'internal video ID')
|
||||||
|
|
||||||
data_url = u'http://tu.tv/flvurl.php?codVideo=' + str(internal_id)
|
data_url = u'http://tu.tv/flvurl.php?codVideo=' + str(internal_id)
|
||||||
@@ -36,6 +34,6 @@ class TutvIE(InfoExtractor):
|
|||||||
'id': internal_id,
|
'id': internal_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'ext': ext,
|
'ext': ext,
|
||||||
'title': title,
|
'title': self._og_search_title(webpage),
|
||||||
}
|
}
|
||||||
return [info]
|
return [info]
|
||||||
|
|||||||
47
youtube_dl/extractor/veoh.py
Normal file
47
youtube_dl/extractor/veoh.py
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
import re
|
||||||
|
import json
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
)
|
||||||
|
|
||||||
|
class VeohIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://www\.veoh\.com/watch/v(?P<id>\d*)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.veoh.com/watch/v56314296nk7Zdmz3',
|
||||||
|
u'file': u'56314296.mp4',
|
||||||
|
u'md5': u'620e68e6a3cff80086df3348426c9ca3',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'Straight Backs Are Stronger',
|
||||||
|
u'uploader': u'LUMOback',
|
||||||
|
u'description': u'At LUMOback, we believe straight backs are stronger. The LUMOback Posture & Movement Sensor: It gently vibrates when you slouch, inspiring improved posture and mobility. Use the app to track your data and improve your posture over time. ',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
m_youtube = re.search(r'http://www\.youtube\.com/v/(.*?)(\&|")', webpage)
|
||||||
|
if m_youtube is not None:
|
||||||
|
youtube_id = m_youtube.group(1)
|
||||||
|
self.to_screen(u'%s: detected Youtube video.' % video_id)
|
||||||
|
return self.url_result(youtube_id, 'Youtube')
|
||||||
|
|
||||||
|
self.report_extraction(video_id)
|
||||||
|
info = self._search_regex(r'videoDetailsJSON = \'({.*?})\';', webpage, 'info')
|
||||||
|
info = json.loads(info)
|
||||||
|
video_url = info.get('fullPreviewHashHighPath') or info.get('fullPreviewHashLowPath')
|
||||||
|
|
||||||
|
return {'id': info['videoId'],
|
||||||
|
'title': info['title'],
|
||||||
|
'ext': determine_ext(video_url),
|
||||||
|
'url': video_url,
|
||||||
|
'uploader': info['username'],
|
||||||
|
'thumbnail': info.get('highResImage') or info.get('medResImage'),
|
||||||
|
'description': info['description'],
|
||||||
|
'view_count': info['views'],
|
||||||
|
}
|
||||||
@@ -27,12 +27,6 @@ class VineIE(InfoExtractor):
|
|||||||
video_url = self._html_search_regex(r'<meta property="twitter:player:stream" content="(.+?)"',
|
video_url = self._html_search_regex(r'<meta property="twitter:player:stream" content="(.+?)"',
|
||||||
webpage, u'video URL')
|
webpage, u'video URL')
|
||||||
|
|
||||||
video_title = self._html_search_regex(r'<meta property="og:title" content="(.+?)"',
|
|
||||||
webpage, u'title')
|
|
||||||
|
|
||||||
thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.+?)(\?.*?)?"',
|
|
||||||
webpage, u'thumbnail', fatal=False)
|
|
||||||
|
|
||||||
uploader = self._html_search_regex(r'<div class="user">.*?<h2>(.+?)</h2>',
|
uploader = self._html_search_regex(r'<div class="user">.*?<h2>(.+?)</h2>',
|
||||||
webpage, u'uploader', fatal=False, flags=re.DOTALL)
|
webpage, u'uploader', fatal=False, flags=re.DOTALL)
|
||||||
|
|
||||||
@@ -40,7 +34,7 @@ class VineIE(InfoExtractor):
|
|||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': video_title,
|
'title': self._og_search_title(webpage),
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
}]
|
}]
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class WatIE(InfoExtractor):
|
class WatIE(InfoExtractor):
|
||||||
|
_WORKING = False
|
||||||
_VALID_URL=r'http://www.wat.tv/.*-(?P<shortID>.*?)_.*?.html'
|
_VALID_URL=r'http://www.wat.tv/.*-(?P<shortID>.*?)_.*?.html'
|
||||||
IE_NAME = 'wat.tv'
|
IE_NAME = 'wat.tv'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
|
|||||||
48
youtube_dl/extractor/weibo.py
Normal file
48
youtube_dl/extractor/weibo.py
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
import re
|
||||||
|
import json
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
class WeiboIE(InfoExtractor):
|
||||||
|
"""
|
||||||
|
The videos in Weibo come from different sites, this IE just finds the link
|
||||||
|
to the external video and returns it.
|
||||||
|
"""
|
||||||
|
_VALID_URL = r'https?://video\.weibo\.com/v/weishipin/t_(?P<id>.+?)\.htm'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://video.weibo.com/v/weishipin/t_zjUw2kZ.htm',
|
||||||
|
u'file': u'98322879.flv',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'魔声耳机最新广告“All Eyes On Us”',
|
||||||
|
},
|
||||||
|
u'note': u'Sina video',
|
||||||
|
u'params': {
|
||||||
|
u'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
# Additional example videos from different sites
|
||||||
|
# Youku: http://video.weibo.com/v/weishipin/t_zQGDWQ8.htm
|
||||||
|
# 56.com: http://video.weibo.com/v/weishipin/t_zQ44HxN.htm
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
info_url = 'http://video.weibo.com/?s=v&a=play_list&format=json&mix_video_id=t_%s' % video_id
|
||||||
|
info_page = self._download_webpage(info_url, video_id)
|
||||||
|
info = json.loads(info_page)
|
||||||
|
|
||||||
|
videos_urls = map(lambda v: v['play_page_url'], info['result']['data'])
|
||||||
|
#Prefer sina video since they have thumbnails
|
||||||
|
videos_urls = sorted(videos_urls, key=lambda u: u'video.sina.com' in u)
|
||||||
|
player_url = videos_urls[-1]
|
||||||
|
m_sina = re.match(r'https?://video.sina.com.cn/v/b/(\d+)-\d+.html', player_url)
|
||||||
|
if m_sina is not None:
|
||||||
|
self.to_screen('Sina video detected')
|
||||||
|
sina_id = m_sina.group(1)
|
||||||
|
player_url = 'http://you.video.sina.com.cn/swf/quotePlayer.swf?vid=%s' % sina_id
|
||||||
|
return self.url_result(player_url)
|
||||||
|
|
||||||
@@ -40,8 +40,20 @@ class YouJizzIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(embed_page_url, video_id)
|
webpage = self._download_webpage(embed_page_url, video_id)
|
||||||
|
|
||||||
# Get the video URL
|
# Get the video URL
|
||||||
video_url = self._search_regex(r'so.addVariable\("file",encodeURIComponent\("(?P<source>[^"]+)"\)\);',
|
m_playlist = re.search(r'so.addVariable\("playlist", ?"(?P<playlist>.+?)"\);', webpage)
|
||||||
webpage, u'video URL')
|
if m_playlist is not None:
|
||||||
|
playlist_url = m_playlist.group('playlist')
|
||||||
|
playlist_page = self._download_webpage(playlist_url, video_id,
|
||||||
|
u'Downloading playlist page')
|
||||||
|
m_levels = list(re.finditer(r'<level bitrate="(\d+?)" file="(.*?)"', playlist_page))
|
||||||
|
if len(m_levels) == 0:
|
||||||
|
raise ExtractorError(u'Unable to extract video url')
|
||||||
|
videos = [(int(m.group(1)), m.group(2)) for m in m_levels]
|
||||||
|
(_, video_url) = sorted(videos)[0]
|
||||||
|
video_url = video_url.replace('%252F', '%2F')
|
||||||
|
else:
|
||||||
|
video_url = self._search_regex(r'so.addVariable\("file",encodeURIComponent\("(?P<source>[^"]+)"\)\);',
|
||||||
|
webpage, u'video URL')
|
||||||
|
|
||||||
info = {'id': video_id,
|
info = {'id': video_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class YoukuIE(InfoExtractor):
|
class YoukuIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?:http://)?v\.youku\.com/v_show/id_(?P<ID>[A-Za-z0-9]+)\.html'
|
_VALID_URL = r'(?:http://)?(v|player)\.youku\.com/(v_show/id_|player\.php/sid/)(?P<ID>[A-Za-z0-9]+)(\.html|/v.swf)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u"url": u"http://v.youku.com/v_show/id_XNDgyMDQ2NTQw.html",
|
u"url": u"http://v.youku.com/v_show/id_XNDgyMDQ2NTQw.html",
|
||||||
u"file": u"XNDgyMDQ2NTQw_part00.flv",
|
u"file": u"XNDgyMDQ2NTQw_part00.flv",
|
||||||
|
|||||||
@@ -117,7 +117,19 @@ class YoutubeIE(InfoExtractor):
|
|||||||
u"uploader": u"IconaPop",
|
u"uploader": u"IconaPop",
|
||||||
u"uploader_id": u"IconaPop"
|
u"uploader_id": u"IconaPop"
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
|
{
|
||||||
|
u"url": u"https://www.youtube.com/watch?v=07FYdnEawAQ",
|
||||||
|
u"file": u"07FYdnEawAQ.mp4",
|
||||||
|
u"note": u"Test VEVO video with age protection (#956)",
|
||||||
|
u"info_dict": {
|
||||||
|
u"upload_date": u"20130703",
|
||||||
|
u"title": u"Justin Timberlake - Tunnel Vision (Explicit)",
|
||||||
|
u"description": u"md5:64249768eec3bc4276236606ea996373",
|
||||||
|
u"uploader": u"justintimberlakeVEVO",
|
||||||
|
u"uploader_id": u"justintimberlakeVEVO"
|
||||||
|
}
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@@ -131,10 +143,6 @@ class YoutubeIE(InfoExtractor):
|
|||||||
"""Report attempt to set language."""
|
"""Report attempt to set language."""
|
||||||
self.to_screen(u'Setting language')
|
self.to_screen(u'Setting language')
|
||||||
|
|
||||||
def report_login(self):
|
|
||||||
"""Report attempt to log in."""
|
|
||||||
self.to_screen(u'Logging in')
|
|
||||||
|
|
||||||
def report_video_webpage_download(self, video_id):
|
def report_video_webpage_download(self, video_id):
|
||||||
"""Report attempt to download video webpage."""
|
"""Report attempt to download video webpage."""
|
||||||
self.to_screen(u'%s: Downloading video webpage' % video_id)
|
self.to_screen(u'%s: Downloading video webpage' % video_id)
|
||||||
@@ -171,20 +179,26 @@ class YoutubeIE(InfoExtractor):
|
|||||||
def _decrypt_signature(self, s):
|
def _decrypt_signature(self, s):
|
||||||
"""Turn the encrypted s field into a working signature"""
|
"""Turn the encrypted s field into a working signature"""
|
||||||
|
|
||||||
if len(s) == 88:
|
if len(s) == 92:
|
||||||
|
return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
|
||||||
|
elif len(s) == 90:
|
||||||
|
return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
|
||||||
|
elif len(s) == 88:
|
||||||
return s[48] + s[81:67:-1] + s[82] + s[66:62:-1] + s[85] + s[61:48:-1] + s[67] + s[47:12:-1] + s[3] + s[11:3:-1] + s[2] + s[12]
|
return s[48] + s[81:67:-1] + s[82] + s[66:62:-1] + s[85] + s[61:48:-1] + s[67] + s[47:12:-1] + s[3] + s[11:3:-1] + s[2] + s[12]
|
||||||
elif len(s) == 87:
|
elif len(s) == 87:
|
||||||
return s[62] + s[82:62:-1] + s[83] + s[61:52:-1] + s[0] + s[51:2:-1]
|
return s[4:23] + s[86] + s[24:85]
|
||||||
elif len(s) == 86:
|
elif len(s) == 86:
|
||||||
return s[2:63] + s[82] + s[64:82] + s[63]
|
return s[2:63] + s[82] + s[64:82] + s[63]
|
||||||
elif len(s) == 85:
|
elif len(s) == 85:
|
||||||
return s[76] + s[82:76:-1] + s[83] + s[75:60:-1] + s[0] + s[59:50:-1] + s[1] + s[49:2:-1]
|
return s[2:8] + s[0] + s[9:21] + s[65] + s[22:65] + s[84] + s[66:82] + s[21]
|
||||||
elif len(s) == 84:
|
elif len(s) == 84:
|
||||||
return s[83:36:-1] + s[2] + s[35:26:-1] + s[3] + s[25:3:-1] + s[26]
|
return s[83:36:-1] + s[2] + s[35:26:-1] + s[3] + s[25:3:-1] + s[26]
|
||||||
elif len(s) == 83:
|
elif len(s) == 83:
|
||||||
return s[52] + s[81:55:-1] + s[2] + s[54:52:-1] + s[82] + s[51:36:-1] + s[55] + s[35:2:-1] + s[36]
|
return s[6] + s[3:6] + s[33] + s[7:24] + s[0] + s[25:33] + s[53] + s[34:53] + s[24] + s[54:]
|
||||||
elif len(s) == 82:
|
elif len(s) == 82:
|
||||||
return s[36] + s[79:67:-1] + s[81] + s[66:40:-1] + s[33] + s[39:36:-1] + s[40] + s[35] + s[0] + s[67] + s[32:0:-1] + s[34]
|
return s[36] + s[79:67:-1] + s[81] + s[66:40:-1] + s[33] + s[39:36:-1] + s[40] + s[35] + s[0] + s[67] + s[32:0:-1] + s[34]
|
||||||
|
elif len(s) == 81:
|
||||||
|
return s[6] + s[3:6] + s[33] + s[7:24] + s[0] + s[25:33] + s[2] + s[34:53] + s[24] + s[54:81]
|
||||||
|
|
||||||
else:
|
else:
|
||||||
raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
|
raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
|
||||||
@@ -296,26 +310,6 @@ class YoutubeIE(InfoExtractor):
|
|||||||
if self._downloader is None:
|
if self._downloader is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
username = None
|
|
||||||
password = None
|
|
||||||
downloader_params = self._downloader.params
|
|
||||||
|
|
||||||
# Attempt to use provided username and password or .netrc data
|
|
||||||
if downloader_params.get('username', None) is not None:
|
|
||||||
username = downloader_params['username']
|
|
||||||
password = downloader_params['password']
|
|
||||||
elif downloader_params.get('usenetrc', False):
|
|
||||||
try:
|
|
||||||
info = netrc.netrc().authenticators(self._NETRC_MACHINE)
|
|
||||||
if info is not None:
|
|
||||||
username = info[0]
|
|
||||||
password = info[2]
|
|
||||||
else:
|
|
||||||
raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
|
|
||||||
except (IOError, netrc.NetrcParseError) as err:
|
|
||||||
self._downloader.report_warning(u'parsing .netrc: %s' % compat_str(err))
|
|
||||||
return
|
|
||||||
|
|
||||||
# Set language
|
# Set language
|
||||||
request = compat_urllib_request.Request(self._LANG_URL)
|
request = compat_urllib_request.Request(self._LANG_URL)
|
||||||
try:
|
try:
|
||||||
@@ -325,6 +319,8 @@ class YoutubeIE(InfoExtractor):
|
|||||||
self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
|
self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
|
||||||
return
|
return
|
||||||
|
|
||||||
|
(username, password) = self._get_login_info()
|
||||||
|
|
||||||
# No authentication to be performed
|
# No authentication to be performed
|
||||||
if username is None:
|
if username is None:
|
||||||
return
|
return
|
||||||
@@ -432,15 +428,35 @@ class YoutubeIE(InfoExtractor):
|
|||||||
|
|
||||||
# Get video info
|
# Get video info
|
||||||
self.report_video_info_webpage_download(video_id)
|
self.report_video_info_webpage_download(video_id)
|
||||||
for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
|
if re.search(r'player-age-gate-content">', video_webpage) is not None:
|
||||||
video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
|
self.report_age_confirmation()
|
||||||
% (video_id, el_type))
|
age_gate = True
|
||||||
|
# We simulate the access to the video from www.youtube.com/v/{video_id}
|
||||||
|
# this can be viewed without login into Youtube
|
||||||
|
data = compat_urllib_parse.urlencode({'video_id': video_id,
|
||||||
|
'el': 'embedded',
|
||||||
|
'gl': 'US',
|
||||||
|
'hl': 'en',
|
||||||
|
'eurl': 'https://youtube.googleapis.com/v/' + video_id,
|
||||||
|
'asv': 3,
|
||||||
|
'sts':'1588',
|
||||||
|
})
|
||||||
|
video_info_url = 'https://www.youtube.com/get_video_info?' + data
|
||||||
video_info_webpage = self._download_webpage(video_info_url, video_id,
|
video_info_webpage = self._download_webpage(video_info_url, video_id,
|
||||||
note=False,
|
note=False,
|
||||||
errnote='unable to download video info webpage')
|
errnote='unable to download video info webpage')
|
||||||
video_info = compat_parse_qs(video_info_webpage)
|
video_info = compat_parse_qs(video_info_webpage)
|
||||||
if 'token' in video_info:
|
else:
|
||||||
break
|
age_gate = False
|
||||||
|
for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
|
||||||
|
video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
|
||||||
|
% (video_id, el_type))
|
||||||
|
video_info_webpage = self._download_webpage(video_info_url, video_id,
|
||||||
|
note=False,
|
||||||
|
errnote='unable to download video info webpage')
|
||||||
|
video_info = compat_parse_qs(video_info_webpage)
|
||||||
|
if 'token' in video_info:
|
||||||
|
break
|
||||||
if 'token' not in video_info:
|
if 'token' not in video_info:
|
||||||
if 'reason' in video_info:
|
if 'reason' in video_info:
|
||||||
raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
|
raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
|
||||||
@@ -557,6 +573,8 @@ class YoutubeIE(InfoExtractor):
|
|||||||
self.report_rtmp_download()
|
self.report_rtmp_download()
|
||||||
video_url_list = [(None, video_info['conn'][0])]
|
video_url_list = [(None, video_info['conn'][0])]
|
||||||
elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
|
elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
|
||||||
|
if 'rtmpe%3Dyes' in video_info['url_encoded_fmt_stream_map'][0]:
|
||||||
|
raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
|
||||||
url_map = {}
|
url_map = {}
|
||||||
for url_data_str in video_info['url_encoded_fmt_stream_map'][0].split(','):
|
for url_data_str in video_info['url_encoded_fmt_stream_map'][0].split(','):
|
||||||
url_data = compat_parse_qs(url_data_str)
|
url_data = compat_parse_qs(url_data_str)
|
||||||
@@ -567,10 +585,17 @@ class YoutubeIE(InfoExtractor):
|
|||||||
elif 's' in url_data:
|
elif 's' in url_data:
|
||||||
if self._downloader.params.get('verbose'):
|
if self._downloader.params.get('verbose'):
|
||||||
s = url_data['s'][0]
|
s = url_data['s'][0]
|
||||||
player = self._search_regex(r'html5player-(.+?)\.js', video_webpage,
|
if age_gate:
|
||||||
'html5 player', fatal=False)
|
player_version = self._search_regex(r'ad3-(.+?)\.swf',
|
||||||
self.to_screen('encrypted signature length %d (%d.%d), itag %s, html5 player %s' %
|
video_info['ad3_module'][0], 'flash player',
|
||||||
(len(s), len(s.split('.')[0]), len(s.split('.')[1]), url_data['itag'][0], player))
|
fatal=False)
|
||||||
|
player = 'flash player %s' % player_version
|
||||||
|
else:
|
||||||
|
player = u'html5 player %s' % self._search_regex(r'html5player-(.+?)\.js', video_webpage,
|
||||||
|
'html5 player', fatal=False)
|
||||||
|
parts_sizes = u'.'.join(compat_str(len(part)) for part in s.split('.'))
|
||||||
|
self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
|
||||||
|
(len(s), parts_sizes, url_data['itag'][0], player))
|
||||||
signature = self._decrypt_signature(url_data['s'][0])
|
signature = self._decrypt_signature(url_data['s'][0])
|
||||||
url += '&signature=' + signature
|
url += '&signature=' + signature
|
||||||
if 'ratebypass' not in url:
|
if 'ratebypass' not in url:
|
||||||
@@ -697,7 +722,7 @@ class YoutubePlaylistIE(InfoExtractor):
|
|||||||
|
|
||||||
videos = [v[1] for v in sorted(videos)]
|
videos = [v[1] for v in sorted(videos)]
|
||||||
|
|
||||||
url_results = [self.url_result(url, 'Youtube') for url in videos]
|
url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
|
||||||
return [self.playlist_result(url_results, playlist_id, playlist_title)]
|
return [self.playlist_result(url_results, playlist_id, playlist_title)]
|
||||||
|
|
||||||
|
|
||||||
@@ -706,7 +731,7 @@ class YoutubeChannelIE(InfoExtractor):
|
|||||||
_VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
|
_VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
|
||||||
_TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
|
_TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
|
||||||
_MORE_PAGES_INDICATOR = 'yt-uix-load-more'
|
_MORE_PAGES_INDICATOR = 'yt-uix-load-more'
|
||||||
_MORE_PAGES_URL = 'http://www.youtube.com/channel_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
|
_MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
|
||||||
IE_NAME = u'youtube:channel'
|
IE_NAME = u'youtube:channel'
|
||||||
|
|
||||||
def extract_videos_from_page(self, page):
|
def extract_videos_from_page(self, page):
|
||||||
@@ -755,7 +780,7 @@ class YoutubeChannelIE(InfoExtractor):
|
|||||||
self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
|
self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
|
||||||
|
|
||||||
urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids]
|
urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids]
|
||||||
url_entries = [self.url_result(url, 'Youtube') for url in urls]
|
url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls]
|
||||||
return [self.playlist_result(url_entries, channel_id)]
|
return [self.playlist_result(url_entries, channel_id)]
|
||||||
|
|
||||||
|
|
||||||
@@ -812,7 +837,7 @@ class YoutubeUserIE(InfoExtractor):
|
|||||||
pagenum += 1
|
pagenum += 1
|
||||||
|
|
||||||
urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
|
urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
|
||||||
url_results = [self.url_result(url, 'Youtube') for url in urls]
|
url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
|
||||||
return [self.playlist_result(url_results, playlist_title = username)]
|
return [self.playlist_result(url_results, playlist_title = username)]
|
||||||
|
|
||||||
class YoutubeSearchIE(SearchInfoExtractor):
|
class YoutubeSearchIE(SearchInfoExtractor):
|
||||||
@@ -873,12 +898,12 @@ class YoutubeShowIE(InfoExtractor):
|
|||||||
return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
|
return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
|
||||||
|
|
||||||
|
|
||||||
class YoutubeSubscriptionsIE(YoutubeIE):
|
class YoutubeFeedsInfoExtractor(YoutubeIE):
|
||||||
"""It's a subclass of YoutubeIE because we need to login"""
|
"""
|
||||||
IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
|
Base class for extractors that fetch info from
|
||||||
_VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
|
http://www.youtube.com/feed_ajax
|
||||||
IE_NAME = u'youtube:subscriptions'
|
Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
|
||||||
_FEED_TEMPLATE = 'http://www.youtube.com/feed_ajax?action_load_system_feed=1&feed_name=subscriptions&paging=%s'
|
"""
|
||||||
_PAGING_STEP = 30
|
_PAGING_STEP = 30
|
||||||
|
|
||||||
# Overwrite YoutubeIE properties we don't want
|
# Overwrite YoutubeIE properties we don't want
|
||||||
@@ -887,12 +912,27 @@ class YoutubeSubscriptionsIE(YoutubeIE):
|
|||||||
def suitable(cls, url):
|
def suitable(cls, url):
|
||||||
return re.match(cls._VALID_URL, url) is not None
|
return re.match(cls._VALID_URL, url) is not None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def _FEED_TEMPLATE(self):
|
||||||
|
return 'http://www.youtube.com/feed_ajax?action_load_system_feed=1&feed_name=%s&paging=%%s' % self._FEED_NAME
|
||||||
|
|
||||||
|
@property
|
||||||
|
def IE_NAME(self):
|
||||||
|
return u'youtube:%s' % self._FEED_NAME
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
(username, password) = self._get_login_info()
|
||||||
|
if username is None:
|
||||||
|
raise ExtractorError(u'No login info available, needed for downloading the Youtube subscriptions.', expected=True)
|
||||||
|
super(YoutubeFeedsInfoExtractor, self)._real_initialize()
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
feed_entries = []
|
feed_entries = []
|
||||||
# The step argument is available only in 2.7 or higher
|
# The step argument is available only in 2.7 or higher
|
||||||
for i in itertools.count(0):
|
for i in itertools.count(0):
|
||||||
paging = i*self._PAGING_STEP
|
paging = i*self._PAGING_STEP
|
||||||
info = self._download_webpage(self._FEED_TEMPLATE % paging, 'feed',
|
info = self._download_webpage(self._FEED_TEMPLATE % paging,
|
||||||
|
u'%s feed' % self._FEED_NAME,
|
||||||
u'Downloading page %s' % i)
|
u'Downloading page %s' % i)
|
||||||
info = json.loads(info)
|
info = json.loads(info)
|
||||||
feed_html = info['feed_html']
|
feed_html = info['feed_html']
|
||||||
@@ -901,4 +941,16 @@ class YoutubeSubscriptionsIE(YoutubeIE):
|
|||||||
feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
|
feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
|
||||||
if info['paging'] is None:
|
if info['paging'] is None:
|
||||||
break
|
break
|
||||||
return self.playlist_result(feed_entries, playlist_title='Youtube Subscriptions')
|
return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
|
||||||
|
|
||||||
|
class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
|
||||||
|
IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
|
||||||
|
_VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
|
||||||
|
_FEED_NAME = 'subscriptions'
|
||||||
|
_PLAYLIST_TITLE = u'Youtube Subscriptions'
|
||||||
|
|
||||||
|
class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
|
||||||
|
IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
|
||||||
|
_VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
|
||||||
|
_FEED_NAME = 'recommended'
|
||||||
|
_PLAYLIST_TITLE = u'Youtube Recommended videos'
|
||||||
|
|||||||
@@ -35,6 +35,11 @@ try:
|
|||||||
except ImportError: # Python 2
|
except ImportError: # Python 2
|
||||||
from urlparse import urlparse as compat_urllib_parse_urlparse
|
from urlparse import urlparse as compat_urllib_parse_urlparse
|
||||||
|
|
||||||
|
try:
|
||||||
|
import urllib.parse as compat_urlparse
|
||||||
|
except ImportError: # Python 2
|
||||||
|
import urlparse as compat_urlparse
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import http.cookiejar as compat_cookiejar
|
import http.cookiejar as compat_cookiejar
|
||||||
except ImportError: # Python 2
|
except ImportError: # Python 2
|
||||||
@@ -198,6 +203,20 @@ else:
|
|||||||
with open(fn, 'w', encoding='utf-8') as f:
|
with open(fn, 'w', encoding='utf-8') as f:
|
||||||
json.dump(obj, f)
|
json.dump(obj, f)
|
||||||
|
|
||||||
|
if sys.version_info >= (2,7):
|
||||||
|
def find_xpath_attr(node, xpath, key, val):
|
||||||
|
""" Find the xpath xpath[@key=val] """
|
||||||
|
assert re.match(r'^[a-zA-Z]+$', key)
|
||||||
|
assert re.match(r'^[a-zA-Z@]*$', val)
|
||||||
|
expr = xpath + u"[@%s='%s']" % (key, val)
|
||||||
|
return node.find(expr)
|
||||||
|
else:
|
||||||
|
def find_xpath_attr(node, xpath, key, val):
|
||||||
|
for f in node.findall(xpath):
|
||||||
|
if f.attrib.get(key) == val:
|
||||||
|
return f
|
||||||
|
return None
|
||||||
|
|
||||||
def htmlentity_transform(matchobj):
|
def htmlentity_transform(matchobj):
|
||||||
"""Transforms an HTML entity to a character.
|
"""Transforms an HTML entity to a character.
|
||||||
|
|
||||||
@@ -631,12 +650,12 @@ def unified_strdate(date_str):
|
|||||||
pass
|
pass
|
||||||
return upload_date
|
return upload_date
|
||||||
|
|
||||||
def determine_ext(url):
|
def determine_ext(url, default_ext=u'unknown_video'):
|
||||||
guess = url.partition(u'?')[0].rpartition(u'.')[2]
|
guess = url.partition(u'?')[0].rpartition(u'.')[2]
|
||||||
if re.match(r'^[A-Za-z0-9]+$', guess):
|
if re.match(r'^[A-Za-z0-9]+$', guess):
|
||||||
return guess
|
return guess
|
||||||
else:
|
else:
|
||||||
return u'unknown_video'
|
return default_ext
|
||||||
|
|
||||||
def date_from_str(date_str):
|
def date_from_str(date_str):
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -1,2 +1,2 @@
|
|||||||
|
|
||||||
__version__ = '2013.07.08.1'
|
__version__ = '2013.07.24'
|
||||||
|
|||||||
Reference in New Issue
Block a user