1
0
mirror of https://source.netsyms.com/Mirrors/youtube-dl synced 2026-03-29 01:43:53 +00:00

Compare commits

...

111 Commits

Author SHA1 Message Date
Philipp Hagemeister
88db5ef279 2012.11.29 2012-11-27 18:36:43 +01:00
Philipp Hagemeister
f8d8b39bba Prepare 2012.11.29 release 2012-11-27 18:30:34 +01:00
Philipp Hagemeister
dcd60025f8 Fix filename sanitation (Closes #555) 2012-11-27 18:27:46 +01:00
Philipp Hagemeister
26396311b5 Add Christian Albrecht (Arte.tv IE) to authors 2012-11-27 17:16:49 +01:00
Philipp Hagemeister
dffe658bac Remove exclamation mark in --restrict-filenames mode 2012-11-27 17:15:33 +01:00
Philipp Hagemeister
33d94a6c99 Merge remote-tracking branch 'alab1001101/master' 2012-11-27 17:14:29 +01:00
Philipp Hagemeister
4d47921c9e ignore kate swap files 2012-11-27 17:01:12 +01:00
Philipp Hagemeister
d94adc2638 Actually fix manpage (#473) 2012-11-27 16:58:50 +01:00
Philipp Hagemeister
5c5d06d31d Merge pull request #473 from grimreaper/master
fix mdoc nits
2012-11-27 07:52:58 -08:00
Philipp Hagemeister
cc872b68a8 Actually merge #379 2012-11-27 16:42:50 +01:00
Philipp Hagemeister
17cb14a336 Merge remote-tracking branch 'joelverhagen/master' 2012-11-27 16:41:16 +01:00
Philipp Hagemeister
877f4c45d3 Fix output format doc 2012-11-27 16:28:29 +01:00
Philipp Hagemeister
02531431f2 Extended documentation for output format in README (Closes #268) 2012-11-27 16:27:35 +01:00
Philipp Hagemeister
e02066e7ff Windows build for 2012.11.28 2012-11-27 16:15:15 +01:00
Philipp Hagemeister
c9128b353d Bump version number to a numeric-only one to appease py2exe 2012-11-27 16:12:08 +01:00
Philipp Hagemeister
e7c6f1a2dc Bump version number 2012-11-27 16:08:39 +01:00
Philipp Hagemeister
1a911e60a4 Add test for asian characters (#551) 2012-11-27 16:07:52 +01:00
Philipp Hagemeister
46cbda0be4 Minor filename encoding improvement in a common case 2012-11-27 15:07:10 +01:00
Philipp Hagemeister
fa59f4b6a9 Merge remote-tracking branch 'chrisjrn/master' 2012-11-27 14:55:18 +01:00
Christopher Neugebauer
4a702f3819 Fixes the InfoExtractor for the Colbert Report. 2012-11-27 23:54:43 +11:00
Philipp Hagemeister
6bac102a4d Fix spacing in comedycentral IE 2012-11-27 13:24:10 +01:00
Philipp Hagemeister
958a22b7cf Merge remote-tracking branch 'chrisjrn/master' 2012-11-27 13:19:18 +01:00
Philipp Hagemeister
97cd3afc75 warn if %(stitle)s is being used 2012-11-27 13:11:06 +01:00
Philipp Hagemeister
aa2a94ed81 Encode the entire filename 2012-11-27 13:01:32 +01:00
Philipp Hagemeister
c7032546f1 Clean up test 2012-11-27 12:46:27 +01:00
Philipp Hagemeister
56781d3d2e Switch back to underline for invalid characters, and make restricted ASCII-only 2012-11-27 12:46:09 +01:00
Christopher Neugebauer
feb22fe5fe Fixed indentation error 2012-11-27 22:32:24 +11:00
Christopher Neugebauer
d8dddb7c02 Removes extranous debugging info :) 2012-11-27 22:30:07 +11:00
Christopher Neugebauer
4408d996fb Adds format listing/selection support to the Comedy Central extractor. 2012-11-27 22:28:16 +11:00
Philipp Hagemeister
ed7516c69d Merge remote-tracking branch 'chrisjrn/master' 2012-11-27 12:25:51 +01:00
Christopher Neugebauer
89af8e9d32 Removes extraneous debug message. 2012-11-27 21:51:30 +11:00
Christopher Neugebauer
36a9c0b5ff Points the ComedyCentral extractor at a CDN which works with more RTMPDump versions. 2012-11-27 21:49:27 +11:00
Philipp Hagemeister
9fb3bfb45a Merge remote-tracking branch 'gcmalloc/master' 2012-11-27 00:42:47 +01:00
Filippo Valsorda
d479e34043 release 2012.11.27 2012-11-27 00:22:39 +01:00
Philipp Hagemeister
240089e5df remove accidental remnants 2012-11-27 00:14:12 +01:00
Philipp Hagemeister
1c469a9480 New optoin --restrict-filenames 2012-11-26 23:58:46 +01:00
Philipp Hagemeister
71f36332dd Remove redundancy in instructions 2012-11-26 23:40:51 +01:00
Philipp Hagemeister
8179d2ba74 Merge branch 'master' of github.com:rg3/youtube-dl 2012-11-26 23:25:04 +01:00
Philipp Hagemeister
df4bad3245 Document configuration 2012-11-26 23:24:55 +01:00
Filippo Valsorda
a7b5c8d6a8 fix FAQ on how to compile (also, starnge fix in the Makefile) 2012-11-26 22:35:12 +01:00
Philipp Hagemeister
92b91c1878 Use character instead of byte strings 2012-11-26 04:23:20 +01:00
Philipp Hagemeister
7ec1a206ea Remove longs (int does the right thing since Python 2.2, see PEP 237) 2012-11-26 04:13:43 +01:00
Philipp Hagemeister
51937c0869 Add some parentheses around print for #180 2012-11-26 04:05:54 +01:00
Philipp Hagemeister
6b50761222 Merge pull request #538 from zejn/patch-1
Also enable album URLs on Vimeo.
2012-11-25 18:04:11 -08:00
Philipp Hagemeister
6571408dc6 Merge pull request #545 from FiloSottile/alias
Kill (alias) --literal and %(title)
2012-11-25 15:57:57 -08:00
Filippo Valsorda
b6fab35b9f alias %(title)s to %(stitle)s 2012-11-25 20:39:42 +01:00
Filippo Valsorda
baec15387c aliased --literal to --title 2012-11-25 20:28:49 +01:00
zejn
297d7fd9c0 Also enable album URLs on Vimeo. 2012-11-21 13:24:14 +01:00
Filippo Valsorda
5002aea371 release 2012.11.17 2012-11-17 14:02:31 +01:00
Filippo Valsorda
74033a662d Reworked Vimeo file selection logic (quality, codec) - closes #530 2012-11-13 21:53:18 +01:00
Filippo Valsorda
0526e4f55a Merge pull request #522 from art-zhitnik/master
--(match|reject)-title utf8 fix
2012-11-11 06:22:10 -08:00
Art Zhitnik
39973a0236 Solve the bug of parsing titles with unicode (cyrillic) 2012-11-11 14:09:12 +10:00
Filippo Valsorda
5d40a470a2 quiet the HTMLParser debug info - closes #517 2012-11-09 12:32:07 +01:00
Filippo Valsorda
4cc391461a fix DailyMotion official users videos - closes #281 - by @yvestan 2012-11-07 14:44:10 +01:00
Filippo Valsorda
bf95333e5e fixed MetacafeIE (uploader nickname regex) - closes #515 2012-11-06 23:08:10 +01:00
Philipp Hagemeister
b7a34316d2 -x for --extract-audio, one of the most popular options 2012-10-30 17:41:38 +01:00
Philipp Hagemeister
74e453bdea New --id option for the old default filename pattern 2012-10-30 17:37:53 +01:00
Philipp Hagemeister
156a59e7a9 Additional tests in file name sanitation 2012-10-29 08:19:54 +01:00
Philipp Hagemeister
aeca861f22 Merge pull request #502 from FiloSottile/new_sanitize_filename
My sanitize_filename proposal
2012-10-28 15:33:59 -07:00
Filippo Valsorda
42cb53fcfa modified filename escaping to a "smarter" one 2012-10-28 22:47:02 +01:00
Filippo Valsorda
fe4d68e196 slight change to Dailymotion uploader regex (fix) 2012-10-28 21:43:43 +01:00
Philipp Hagemeister
25b7fd9c01 Merge pull request #491 from tyll/master
Update install target
2012-10-26 01:10:25 -07:00
Till Maas
e79e8b7dc4 Update install target
- Allow to configure destination directories to fulfill the needs of
  different distributions
- Support DESTDIR variable for staging installation when packaging
- Do not set user/group to root. It requires 'make install' to run as
  root, but then this is the default behaviour anyways.
2012-10-25 21:19:13 +02:00
Filippo Valsorda
965a8b2bc4 Merge pull request #488 from Tailszefox/local
Fix audio bitrate quality for ffmpeg/avconv (closes #487)
2012-10-24 11:42:31 -07:00
gcmalloc
a8ac2f8664 adding second vimeo url 2012-10-24 15:57:19 +02:00
gcmalloc
fb0e99b884 skipping vimeo for the moment 2012-10-24 00:32:23 +02:00
gcmalloc
9c6e9a4532 adding xnxx test 2012-10-24 00:13:16 +02:00
gcmalloc
67af74992e adding collegehumor test 2012-10-24 00:05:45 +02:00
gcmalloc
103c508ffa adding stanford open class courses 2012-10-23 23:59:12 +02:00
gcmalloc
2876773381 adding test for vimeo, xvideo and soundcloud 2012-10-23 23:53:33 +02:00
Tailszefox
f06eaa873e Fix audio bitrate quality for ffmpeg/avconv 2012-10-23 16:37:12 +02:00
Philipp Hagemeister
ece34e8951 Merge pull request #486 from Tailszefox/local
Added duration for YouTube videos
2012-10-23 05:53:28 -07:00
Tailszefox
2262a32dd7 Added duration for YouTube videos 2012-10-22 18:32:42 +02:00
Philipp Hagemeister
c6c0e23a32 Support raw playlist parameters (Closes #482) 2012-10-22 13:01:36 +02:00
Philipp Hagemeister
02b324a23d Restore 2.5 compat by activating with_statement future 2012-10-22 12:51:20 +02:00
Filippo Valsorda
b8005afc20 handle YT urls with #/ redirects (closes #484) 2012-10-22 09:15:27 +02:00
Philipp Hagemeister
073522bc6c Don't use 2.7+ check_output 2012-10-19 23:28:37 +02:00
Philipp Hagemeister
9248cb0549 Merge pull request #472 from gcmalloc/master
Test proposal
2012-10-19 05:48:12 -07:00
gcmalloc
6b41b61119 correcting travis 2012-10-19 12:53:20 +02:00
gcmalloc
591bbe9c90 changing test from md5 to filesize, the file changed between download 2012-10-19 12:53:20 +02:00
gcmalloc
fc7376016c cleaning the test that doesn't work with the api for the moment 2012-10-19 12:53:20 +02:00
gcmalloc
97a37c2319 some assertion on the file downloaded 2012-10-19 12:53:20 +02:00
gcmalloc
3afed78a6a removing testing video 2012-10-19 12:53:20 +02:00
gcmalloc
4279a0ca98 correcting test to be compatible with python2.6 2012-10-19 12:53:20 +02:00
gcmalloc
edcc7d2dd3 StringIO used by nosetests do not merge with the way youtube-dl handle sys.stdout and sys.stderr 2012-10-19 12:53:19 +02:00
gcmalloc
7f60b5aa40 correction on the test 2012-10-19 12:53:19 +02:00
Eitan Adler
65adb79fb6 Fix mandoc nits 2012-10-15 21:45:56 -04:00
gcmalloc
aeeb29a356 adding travis support 2012-10-15 10:58:35 +02:00
Filippo Valsorda
902b2a0a45 New IE: YouTube channels (closes #396) 2012-10-14 13:48:18 +02:00
gcmalloc
6d9c22cd26 correcting the makefile according to the new one 2012-10-12 20:30:01 +02:00
gcmalloc
729baf58b2 removing extended globbing for the find utility 2012-10-12 20:25:22 +02:00
gcmalloc
4c9afeca34 adding xvideo 2012-10-12 20:25:22 +02:00
gcmalloc
6da7877bf5 adding facebook test 2012-10-12 20:25:22 +02:00
gcmalloc
b4e5de51ec adding photobucket test 2012-10-12 20:25:22 +02:00
gcmalloc
a4b5f22554 adding metacafe test 2012-10-12 20:25:22 +02:00
gcmalloc
ff08984246 adding dailymotion test 2012-10-12 20:25:22 +02:00
gcmalloc
137c5803c3 some changes to keep the same standard 2012-10-12 20:25:22 +02:00
gcmalloc
3eec021a1f removing unused global modifier 2012-10-12 20:25:22 +02:00
gcmalloc
5a33b73309 correcting the makefile 2012-10-12 20:25:22 +02:00
gcmalloc
0b4e98490b changing test video 2012-10-12 20:24:58 +02:00
gcmalloc
80a846e119 correction on the test for the utils.py 2012-10-12 20:24:58 +02:00
gcmalloc
434d60cd95 adding clean rule in the makefile 2012-10-12 20:24:58 +02:00
gcmalloc
efe8902f0b adding download test with md5 check 2012-10-12 20:24:58 +02:00
gcmalloc
44fb345437 adding TestCase class and corresponding test 2012-10-12 20:24:58 +02:00
gcmalloc
9993976ae4 correction on the sanitize title method, change in title resulting 2012-10-12 20:24:58 +02:00
gcmalloc
b387fb0385 adding test rule in the Makefile 2012-10-12 20:24:58 +02:00
Filippo Valsorda
10daa766a1 support EDU YouTube playlists (closes #407) 2012-10-11 08:27:19 +02:00
Christian Albrecht
2ebc6e6a92 Make youtube-dl 2012-08-26 09:57:49 +02:00
Christian Albrecht
f2ad10a97d Add arte.tv Info Extractor 2012-08-26 09:47:19 +02:00
Joel Verhagen
aab4fca422 Updated --no-resize-buffer docs, removed -b option 2012-07-16 10:59:21 -04:00
Joel Verhagen
891d7f2329 Added options to set download buffer size and disable automatic buffer resizing. 2012-07-14 16:47:19 -04:00
19 changed files with 906 additions and 211 deletions

1
.gitignore vendored
View File

@@ -3,3 +3,4 @@
*~
wine-py2exe/
py2exe.log
*.kate-swp

9
.travis.yml Normal file
View File

@@ -0,0 +1,9 @@
language: python
#specify the python version
python:
- "2.6"
- "2.7"
#command to install the setup
install:
# command to run tests
script: nosetests test --nocapture

View File

@@ -1 +1 @@
2012.10.09
2012.11.29

View File

@@ -5,12 +5,22 @@ clean:
rm -f youtube-dl youtube-dl.exe youtube-dl.1 LATEST_VERSION
PREFIX=/usr/local
install: youtube-dl youtube-dl.1 youtube-dl.bash-completion
install -m 755 --owner root --group root youtube-dl $(PREFIX)/bin/
install -m 644 --owner root --group root youtube-dl.1 $(PREFIX)/man/man1
install -m 644 --owner root --group root youtube-dl.bash-completion /etc/bash_completion.d/youtube-dl
BINDIR=$(PREFIX)/bin
MANDIR=$(PREFIX)/man
SYSCONFDIR=/etc
.PHONY: all clean install README.md youtube-dl.bash-completion
install: youtube-dl youtube-dl.1 youtube-dl.bash-completion
install -d $(DESTDIR)$(BINDIR)
install -m 755 youtube-dl $(DESTDIR)$(BINDIR)
install -d $(DESTDIR)$(MANDIR)/man1
install -m 644 youtube-dl.1 $(DESTDIR)$(MANDIR)/man1
install -d $(DESTDIR)$(SYSCONFDIR)/bash_completion.d
install -m 644 youtube-dl.bash-completion $(DESTDIR)$(SYSCONFDIR)/bash_completion.d/youtube-dl
test:
nosetests2 --nocapture test
.PHONY: all clean install test README.md youtube-dl.bash-completion
# TODO un-phony README.md and youtube-dl.bash_completion by reading from .in files and generating from them
youtube-dl: youtube_dl/*.py
@@ -26,13 +36,13 @@ youtube-dl.exe: youtube_dl/*.py
README.md: youtube_dl/*.py
@options=$$(COLUMNS=80 python -m youtube_dl --help | sed -e '1,/.*General Options.*/ d' -e 's/^\W\{2\}\(\w\)/## \1/') && \
header=$$(sed -e '/.*# OPTIONS/,$$ d' README.md) && \
footer=$$(sed -e '1,/.*# FAQ/ d' README.md) && \
footer=$$(sed -e '1,/.*# CONFIGURATION/ d' README.md) && \
echo "$${header}" > README.md && \
echo >> README.md && \
echo '# OPTIONS' >> README.md && \
echo "$${options}" >> README.md&& \
echo >> README.md && \
echo '# FAQ' >> README.md && \
echo '# CONFIGURATION' >> README.md && \
echo "$${footer}" >> README.md
youtube-dl.1: README.md

View File

@@ -1,4 +1,4 @@
% youtube-dl(1)
% YOUTUBE-DL(1)
# NAME
youtube-dl
@@ -20,6 +20,11 @@ which means you can modify it, redistribute it or use it however you like.
-i, --ignore-errors continue on download errors
-r, --rate-limit LIMIT download rate limit (e.g. 50k or 44.6m)
-R, --retries RETRIES number of retries (default is 10)
--buffer-size SIZE size of download buffer (e.g. 1024 or 16k) (default
is 1024)
--no-resize-buffer do not automatically adjust the buffer size. By
default, the buffer size is automatically resized
from an initial value of SIZE.
--dump-user-agent display the current browser identification
--user-agent UA specify a custom user agent
--list-extractors List all supported extractors and the URLs they
@@ -36,9 +41,10 @@ which means you can modify it, redistribute it or use it however you like.
## Filesystem Options:
-t, --title use title in file name
-l, --literal use literal title in file name
--id use video ID in file name
-l, --literal [deprecated] alias of --title
-A, --auto-number number downloaded files starting from 00000
-o, --output TEMPLATE output filename template. Use %(stitle)s to get the
-o, --output TEMPLATE output filename template. Use %(title)s to get the
title, %(uploader)s for the uploader name,
%(autonumber)s to get an automatically incremented
number, %(ext)s for the filename extension,
@@ -46,6 +52,8 @@ which means you can modify it, redistribute it or use it however you like.
%(extractor)s for the provider (youtube, metacafe,
etc), %(id)s for the video id and %% for a literal
percent. Use - to output to stdout.
--restrict-filenames Restrict filenames to only ASCII characters, and
avoid "&" and spaces in filenames
-a, --batch-file FILE file containing URLs to download ('-' for stdin)
-w, --no-overwrites do not overwrite files
-c, --continue resume partially downloaded files
@@ -91,7 +99,7 @@ which means you can modify it, redistribute it or use it however you like.
-n, --netrc use .netrc authentication data
## Post-processing Options:
--extract-audio convert video files to audio-only files (requires
-x, --extract-audio convert video files to audio-only files (requires
ffmpeg or avconv and ffprobe or avprobe)
--audio-format FORMAT "best", "aac", "vorbis", "mp3", "m4a", or "wav";
best by default
@@ -101,6 +109,32 @@ which means you can modify it, redistribute it or use it however you like.
-k, --keep-video keeps the video file on disk after the post-
processing; the video is erased by default
# CONFIGURATION
You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.local/config/youtube-dl.conf`.
# OUTPUT TEMPLATE
The `-o` option allows users to indicate a template for the output file names. The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "http://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences have the format `%(NAME)s`. To clarify, that is a percent symbol followed by a name in parenthesis, followed by a lowercase S. Allowed names are:
- `id`: The sequence will be replaced by the video identifier.
- `url`: The sequence will be replaced by the video URL.
- `uploader`: The sequence will be replaced by the nickname of the person who uploaded the video.
- `upload_date`: The sequence will be replaced by the upload date in YYYYMMDD format.
- `title`: The sequence will be replaced by the video title.
- `ext`: The sequence will be replaced by the appropriate extension (like flv or mp4).
- `epoch`: The sequence will be replaced by the Unix epoch when creating the file.
- `autonumber`: The sequence will be replaced by a five-digit number that will be increased with each download, starting at zero.
The current default template is `%(id)s.%(ext)s`, but that will be switchted to `%(title)s-%(id)s.%(ext)s` (which can be requested with `-t` at the moment).
In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title:
$ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc
youtube-dl test video ''_ä↭𝕐.mp4 # All kinds of weird characters
$ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc --restrict-filenames
youtube-dl_test_video_.mp4 # A simple file name
# FAQ
### Can you please put the -b option back?
@@ -146,7 +180,7 @@ Please note that Python 2.5 is not supported anymore.
### What is this binary file? Where has the code gone?
Since June 2012 (#342) youtube-dl is packed as an executable zipfile, simply unzip it (might need renaming to `youtube-dl.zip` first on some systems) or clone the git repo to see the code. If you modify the code, you can run it by executing the `__main__.py` file. To recompile the executable, run `make compile`.
Since June 2012 (#342) youtube-dl is packed as an executable zipfile, simply unzip it (might need renaming to `youtube-dl.zip` first on some systems) or clone the git repository, as laid out above. If you modify the code, you can run it by executing the `__main__.py` file. To recompile the executable, run `make youtube-dl`.
### The exe throws a *Runtime error from Visual C++*

1
test/parameters.json Normal file
View File

@@ -0,0 +1 @@
{"username": null, "listformats": null, "skip_download": false, "usenetrc": false, "max_downloads": null, "noprogress": false, "forcethumbnail": false, "forceformat": false, "format_limit": null, "ratelimit": null, "nooverwrites": false, "forceurl": false, "writeinfojson": false, "simulate": false, "playliststart": 1, "continuedl": true, "password": null, "prefer_free_formats": false, "nopart": false, "retries": 10, "updatetime": true, "consoletitle": false, "verbose": true, "forcefilename": false, "ignoreerrors": false, "logtostderr": false, "format": null, "subtitleslang": null, "quiet": false, "outtmpl": "%(id)s.%(ext)s", "rejecttitle": null, "playlistend": -1, "writedescription": false, "forcetitle": false, "forcedescription": false, "writesubtitles": false, "matchtitle": null}

View File

@@ -1,29 +0,0 @@
# -*- coding: utf-8 -*-
# Various small unit tests
import os,sys
sys.path.append(os.path.dirname(os.path.dirname(__file__)))
import youtube_dl
def test_simplify_title():
assert youtube_dl._simplify_title(u'abc') == u'abc'
assert youtube_dl._simplify_title(u'abc_d-e') == u'abc_d-e'
assert youtube_dl._simplify_title(u'123') == u'123'
assert u'/' not in youtube_dl._simplify_title(u'abc/de')
assert u'abc' in youtube_dl._simplify_title(u'abc/de')
assert u'de' in youtube_dl._simplify_title(u'abc/de')
assert u'/' not in youtube_dl._simplify_title(u'abc/de///')
assert u'\\' not in youtube_dl._simplify_title(u'abc\\de')
assert u'abc' in youtube_dl._simplify_title(u'abc\\de')
assert u'de' in youtube_dl._simplify_title(u'abc\\de')
assert youtube_dl._simplify_title(u'ä') == u'ä'
assert youtube_dl._simplify_title(u'кириллица') == u'кириллица'
# Strip underlines
assert youtube_dl._simplify_title(u'\'a_') == u'a'

198
test/test_download.py Normal file
View File

@@ -0,0 +1,198 @@
#!/usr/bin/env python2
import unittest
import hashlib
import os
import json
from youtube_dl.FileDownloader import FileDownloader
from youtube_dl.InfoExtractors import YoutubeIE, DailymotionIE
from youtube_dl.InfoExtractors import MetacafeIE, BlipTVIE
from youtube_dl.InfoExtractors import XVideosIE, VimeoIE
from youtube_dl.InfoExtractors import SoundcloudIE, StanfordOpenClassroomIE
from youtube_dl.InfoExtractors import CollegeHumorIE, XNXXIE
class DownloadTest(unittest.TestCase):
PARAMETERS_FILE = "test/parameters.json"
#calculated with md5sum:
#md5sum (GNU coreutils) 8.19
YOUTUBE_SIZE = 1993883
YOUTUBE_URL = "http://www.youtube.com/watch?v=BaW_jenozKc"
YOUTUBE_FILE = "BaW_jenozKc.mp4"
DAILYMOTION_MD5 = "d363a50e9eb4f22ce90d08d15695bb47"
DAILYMOTION_URL = "http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech"
DAILYMOTION_FILE = "x33vw9.mp4"
METACAFE_SIZE = 5754305
METACAFE_URL = "http://www.metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/"
METACAFE_FILE = "_aUehQsCQtM.flv"
BLIP_MD5 = "93c24d2f4e0782af13b8a7606ea97ba7"
BLIP_URL = "http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352"
BLIP_FILE = "5779306.m4v"
XVIDEO_MD5 = "1ab4dedc01f771cb2a65e91caa801aaf"
XVIDEO_URL = "http://www.xvideos.com/video939581/funny_porns_by_s_-1"
XVIDEO_FILE = "939581.flv"
VIMEO_MD5 = "1ab4dedc01f771cb2a65e91caa801aaf"
VIMEO_URL = "http://vimeo.com/14160053"
VIMEO_FILE = ""
VIMEO2_MD5 = ""
VIMEO2_URL = "http://player.vimeo.com/video/47019590"
VIMEO2_FILE = ""
SOUNDCLOUD_MD5 = "ce3775768ebb6432fa8495d446a078ed"
SOUNDCLOUD_URL = "http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy"
SOUNDCLOUD_FILE = "n6FLbx6ZzMiu.mp3"
STANDFORD_MD5 = "22c8206291368c4e2c9c1a307f0ea0f4"
STANDFORD_URL = "http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100"
STANDFORD_FILE = "PracticalUnix_intro-environment.mp4"
COLLEGEHUMOR_MD5 = ""
COLLEGEHUMOR_URL = "http://www.collegehumor.com/video/6830834/mitt-romney-style-gangnam-style-parody"
COLLEGEHUMOR_FILE = ""
XNXX_MD5 = "5f0469c8d1dfd1bc38c8e6deb5e0a21d"
XNXX_URL = "http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_"
XNXX_FILE = "1135332.flv"
def test_youtube(self):
#let's download a file from youtube
with open(DownloadTest.PARAMETERS_FILE) as f:
fd = FileDownloader(json.load(f))
fd.add_info_extractor(YoutubeIE())
fd.download([DownloadTest.YOUTUBE_URL])
self.assertTrue(os.path.exists(DownloadTest.YOUTUBE_FILE))
self.assertEqual(os.path.getsize(DownloadTest.YOUTUBE_FILE), DownloadTest.YOUTUBE_SIZE)
def test_dailymotion(self):
with open(DownloadTest.PARAMETERS_FILE) as f:
fd = FileDownloader(json.load(f))
fd.add_info_extractor(DailymotionIE())
fd.download([DownloadTest.DAILYMOTION_URL])
self.assertTrue(os.path.exists(DownloadTest.DAILYMOTION_FILE))
md5_down_file = md5_for_file(DownloadTest.DAILYMOTION_FILE)
self.assertEqual(md5_down_file, DownloadTest.DAILYMOTION_MD5)
def test_metacafe(self):
#this emulate a skip,to be 2.6 compatible
with open(DownloadTest.PARAMETERS_FILE) as f:
fd = FileDownloader(json.load(f))
fd.add_info_extractor(MetacafeIE())
fd.add_info_extractor(YoutubeIE())
fd.download([DownloadTest.METACAFE_URL])
self.assertTrue(os.path.exists(DownloadTest.METACAFE_FILE))
self.assertEqual(os.path.getsize(DownloadTest.METACAFE_FILE), DownloadTest.METACAFE_SIZE)
def test_blip(self):
with open(DownloadTest.PARAMETERS_FILE) as f:
fd = FileDownloader(json.load(f))
fd.add_info_extractor(BlipTVIE())
fd.download([DownloadTest.BLIP_URL])
self.assertTrue(os.path.exists(DownloadTest.BLIP_FILE))
md5_down_file = md5_for_file(DownloadTest.BLIP_FILE)
self.assertEqual(md5_down_file, DownloadTest.BLIP_MD5)
def test_xvideo(self):
with open(DownloadTest.PARAMETERS_FILE) as f:
fd = FileDownloader(json.load(f))
fd.add_info_extractor(XVideosIE())
fd.download([DownloadTest.XVIDEO_URL])
self.assertTrue(os.path.exists(DownloadTest.XVIDEO_FILE))
md5_down_file = md5_for_file(DownloadTest.XVIDEO_FILE)
self.assertEqual(md5_down_file, DownloadTest.XVIDEO_MD5)
def test_vimeo(self):
#skipped for the moment produce an error
return
with open(DownloadTest.PARAMETERS_FILE) as f:
fd = FileDownloader(json.load(f))
fd.add_info_extractor(VimeoIE())
fd.download([DownloadTest.VIMEO_URL])
self.assertTrue(os.path.exists(DownloadTest.VIMEO_FILE))
md5_down_file = md5_for_file(DownloadTest.VIMEO_FILE)
self.assertEqual(md5_down_file, DownloadTest.VIMEO_MD5)
def test_vimeo2(self):
#skipped for the moment produce an error
return
with open(DownloadTest.PARAMETERS_FILE) as f:
fd = FileDownloader(json.load(f))
fd.add_info_extractor(VimeoIE())
fd.download([DownloadTest.VIMEO2_URL])
self.assertTrue(os.path.exists(DownloadTest.VIMEO2_FILE))
md5_down_file = md5_for_file(DownloadTest.VIMEO2_FILE)
self.assertEqual(md5_down_file, DownloadTest.VIMEO2_MD5)
def test_soundcloud(self):
with open(DownloadTest.PARAMETERS_FILE) as f:
fd = FileDownloader(json.load(f))
fd.add_info_extractor(SoundcloudIE())
fd.download([DownloadTest.SOUNDCLOUD_URL])
self.assertTrue(os.path.exists(DownloadTest.SOUNDCLOUD_FILE))
md5_down_file = md5_for_file(DownloadTest.SOUNDCLOUD_FILE)
self.assertEqual(md5_down_file, DownloadTest.SOUNDCLOUD_MD5)
def test_standford(self):
with open(DownloadTest.PARAMETERS_FILE) as f:
fd = FileDownloader(json.load(f))
fd.add_info_extractor(StanfordOpenClassroomIE())
fd.download([DownloadTest.STANDFORD_URL])
self.assertTrue(os.path.exists(DownloadTest.STANDFORD_FILE))
md5_down_file = md5_for_file(DownloadTest.STANDFORD_FILE)
self.assertEqual(md5_down_file, DownloadTest.STANDFORD_MD5)
def test_collegehumor(self):
with open(DownloadTest.PARAMETERS_FILE) as f:
fd = FileDownloader(json.load(f))
fd.add_info_extractor(CollegeHumorIE())
fd.download([DownloadTest.COLLEGEHUMOR_URL])
self.assertTrue(os.path.exists(DownloadTest.COLLEGEHUMOR_FILE))
md5_down_file = md5_for_file(DownloadTest.COLLEGEHUMOR_FILE)
self.assertEqual(md5_down_file, DownloadTest.COLLEGEHUMOR_MD5)
def test_xnxx(self):
with open(DownloadTest.PARAMETERS_FILE) as f:
fd = FileDownloader(json.load(f))
fd.add_info_extractor(XNXXIE())
fd.download([DownloadTest.XNXX_URL])
self.assertTrue(os.path.exists(DownloadTest.XNXX_FILE))
md5_down_file = md5_for_file(DownloadTest.XNXX_FILE)
self.assertEqual(md5_down_file, DownloadTest.XNXX_MD5)
def tearDown(self):
if os.path.exists(DownloadTest.YOUTUBE_FILE):
os.remove(DownloadTest.YOUTUBE_FILE)
if os.path.exists(DownloadTest.DAILYMOTION_FILE):
os.remove(DownloadTest.DAILYMOTION_FILE)
if os.path.exists(DownloadTest.METACAFE_FILE):
os.remove(DownloadTest.METACAFE_FILE)
if os.path.exists(DownloadTest.BLIP_FILE):
os.remove(DownloadTest.BLIP_FILE)
if os.path.exists(DownloadTest.XVIDEO_FILE):
os.remove(DownloadTest.XVIDEO_FILE)
if os.path.exists(DownloadTest.VIMEO_FILE):
os.remove(DownloadTest.VIMEO_FILE)
if os.path.exists(DownloadTest.SOUNDCLOUD_FILE):
os.remove(DownloadTest.SOUNDCLOUD_FILE)
if os.path.exists(DownloadTest.STANDFORD_FILE):
os.remove(DownloadTest.STANDFORD_FILE)
if os.path.exists(DownloadTest.COLLEGEHUMOR_FILE):
os.remove(DownloadTest.COLLEGEHUMOR_FILE)
if os.path.exists(DownloadTest.XNXX_FILE):
os.remove(DownloadTest.XNXX_FILE)
def md5_for_file(filename, block_size=2**20):
with open(filename) as f:
md5 = hashlib.md5()
while True:
data = f.read(block_size)
if not data:
break
md5.update(data)
return md5.hexdigest()

79
test/test_utils.py Normal file
View File

@@ -0,0 +1,79 @@
# -*- coding: utf-8 -*-
# Various small unit tests
import unittest
#from youtube_dl.utils import htmlentity_transform
from youtube_dl.utils import timeconvert
from youtube_dl.utils import sanitize_filename
from youtube_dl.utils import unescapeHTML
from youtube_dl.utils import orderedSet
class TestUtil(unittest.TestCase):
def test_timeconvert(self):
self.assertTrue(timeconvert('') is None)
self.assertTrue(timeconvert('bougrg') is None)
def test_sanitize_filename(self):
self.assertEqual(sanitize_filename(u'abc'), u'abc')
self.assertEqual(sanitize_filename(u'abc_d-e'), u'abc_d-e')
self.assertEqual(sanitize_filename(u'123'), u'123')
self.assertEqual(u'abc_de', sanitize_filename(u'abc/de'))
self.assertFalse(u'/' in sanitize_filename(u'abc/de///'))
self.assertEqual(u'abc_de', sanitize_filename(u'abc/<>\\*|de'))
self.assertEqual(u'xxx', sanitize_filename(u'xxx/<>\\*|'))
self.assertEqual(u'yes no', sanitize_filename(u'yes? no'))
self.assertEqual(u'this - that', sanitize_filename(u'this: that'))
self.assertEqual(sanitize_filename(u'AT&T'), u'AT&T')
self.assertEqual(sanitize_filename(u'ä'), u'ä')
self.assertEqual(sanitize_filename(u'кириллица'), u'кириллица')
forbidden = u'"\0\\/'
for fc in forbidden:
for fbc in forbidden:
self.assertTrue(fbc not in sanitize_filename(fc))
def test_sanitize_filename_restricted(self):
self.assertEqual(sanitize_filename(u'abc', restricted=True), u'abc')
self.assertEqual(sanitize_filename(u'abc_d-e', restricted=True), u'abc_d-e')
self.assertEqual(sanitize_filename(u'123', restricted=True), u'123')
self.assertEqual(u'abc_de', sanitize_filename(u'abc/de', restricted=True))
self.assertFalse(u'/' in sanitize_filename(u'abc/de///', restricted=True))
self.assertEqual(u'abc_de', sanitize_filename(u'abc/<>\\*|de', restricted=True))
self.assertEqual(u'xxx', sanitize_filename(u'xxx/<>\\*|', restricted=True))
self.assertEqual(u'yes_no', sanitize_filename(u'yes? no', restricted=True))
self.assertEqual(u'this_-_that', sanitize_filename(u'this: that', restricted=True))
self.assertEqual(sanitize_filename(u'aäb中国的c', restricted=True), u'a_b_c')
self.assertTrue(sanitize_filename(u'ö', restricted=True) != u'') # No empty filename
forbidden = u'"\0\\/&!: \'\t\n'
for fc in forbidden:
for fbc in forbidden:
self.assertTrue(fbc not in sanitize_filename(fc, restricted=True))
# Handle a common case more neatly
self.assertEqual(sanitize_filename(u'大声带 - Song', restricted=True), u'Song')
self.assertEqual(sanitize_filename(u'总统: Speech', restricted=True), u'Speech')
# .. but make sure the file name is never empty
self.assertTrue(sanitize_filename(u'-', restricted=True) != u'')
self.assertTrue(sanitize_filename(u':', restricted=True) != u'')
def test_ordered_set(self):
self.assertEqual(orderedSet([1,1,2,3,4,4,5,6,7,3,5]), [1,2,3,4,5,6,7])
self.assertEqual(orderedSet([]), [])
self.assertEqual(orderedSet([1]), [1])
#keep the list ordered
self.assertEqual(orderedSet([135,1,1,1]), [135,1])
def test_unescape_html(self):
self.assertEqual(unescapeHTML(u"%20;"), u"%20;")

Binary file not shown.

Binary file not shown.

View File

@@ -1,4 +1,4 @@
.TH youtube-dl 1 ""
.TH YOUTUBE-DL 1 ""
.SH NAME
.PP
youtube-dl
@@ -24,6 +24,11 @@ redistribute it or use it however you like.
-i,\ --ignore-errors\ \ \ \ \ \ continue\ on\ download\ errors
-r,\ --rate-limit\ LIMIT\ \ \ download\ rate\ limit\ (e.g.\ 50k\ or\ 44.6m)
-R,\ --retries\ RETRIES\ \ \ \ number\ of\ retries\ (default\ is\ 10)
--buffer-size\ SIZE\ \ \ \ \ \ \ size\ of\ download\ buffer\ (e.g.\ 1024\ or\ 16k)\ (default
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ is\ 1024)
--no-resize-buffer\ \ \ \ \ \ \ do\ not\ automatically\ adjust\ the\ buffer\ size.\ By
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ default,\ the\ buffer\ size\ is\ automatically\ resized
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ from\ an\ initial\ value\ of\ SIZE.
--dump-user-agent\ \ \ \ \ \ \ \ display\ the\ current\ browser\ identification
--user-agent\ UA\ \ \ \ \ \ \ \ \ \ specify\ a\ custom\ user\ agent
--list-extractors\ \ \ \ \ \ \ \ List\ all\ supported\ extractors\ and\ the\ URLs\ they
@@ -48,9 +53,10 @@ redistribute it or use it however you like.
.nf
\f[C]
-t,\ --title\ \ \ \ \ \ \ \ \ \ \ \ \ \ use\ title\ in\ file\ name
-l,\ --literal\ \ \ \ \ \ \ \ \ \ \ \ use\ literal\ title\ in\ file\ name
--id\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ use\ video\ ID\ in\ file\ name
-l,\ --literal\ \ \ \ \ \ \ \ \ \ \ \ [deprecated]\ alias\ of\ --title
-A,\ --auto-number\ \ \ \ \ \ \ \ number\ downloaded\ files\ starting\ from\ 00000
-o,\ --output\ TEMPLATE\ \ \ \ output\ filename\ template.\ Use\ %(stitle)s\ to\ get\ the
-o,\ --output\ TEMPLATE\ \ \ \ output\ filename\ template.\ Use\ %(title)s\ to\ get\ the
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ title,\ %(uploader)s\ for\ the\ uploader\ name,
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(autonumber)s\ to\ get\ an\ automatically\ incremented
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ number,\ %(ext)s\ for\ the\ filename\ extension,
@@ -58,6 +64,8 @@ redistribute it or use it however you like.
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(extractor)s\ for\ the\ provider\ (youtube,\ metacafe,
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ etc),\ %(id)s\ for\ the\ video\ id\ and\ %%\ for\ a\ literal
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ percent.\ Use\ -\ to\ output\ to\ stdout.
--restrict-filenames\ \ \ \ \ Restrict\ filenames\ to\ only\ ASCII\ characters,\ and
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ avoid\ "&"\ and\ spaces\ in\ filenames
-a,\ --batch-file\ FILE\ \ \ \ file\ containing\ URLs\ to\ download\ (\[aq]-\[aq]\ for\ stdin)
-w,\ --no-overwrites\ \ \ \ \ \ do\ not\ overwrite\ files
-c,\ --continue\ \ \ \ \ \ \ \ \ \ \ resume\ partially\ downloaded\ files
@@ -119,7 +127,7 @@ redistribute it or use it however you like.
.IP
.nf
\f[C]
--extract-audio\ \ \ \ \ \ \ \ \ \ convert\ video\ files\ to\ audio-only\ files\ (requires
-x,\ --extract-audio\ \ \ \ \ \ convert\ video\ files\ to\ audio-only\ files\ (requires
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ ffmpeg\ or\ avconv\ and\ ffprobe\ or\ avprobe)
--audio-format\ FORMAT\ \ \ \ "best",\ "aac",\ "vorbis",\ "mp3",\ "m4a",\ or\ "wav";
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ best\ by\ default
@@ -130,6 +138,65 @@ redistribute it or use it however you like.
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ processing;\ the\ video\ is\ erased\ by\ default
\f[]
.fi
.SH CONFIGURATION
.PP
You can configure youtube-dl by placing default arguments (such as
\f[C]--extract-audio\ --no-mtime\f[] to always extract the audio and not
copy the mtime) into \f[C]/etc/youtube-dl.conf\f[] and/or
\f[C]~/.local/config/youtube-dl.conf\f[].
.SH OUTPUT TEMPLATE
.PP
The \f[C]-o\f[] option allows users to indicate a template for the
output file names.
The basic usage is not to set any template arguments when downloading a
single file, like in
\f[C]youtube-dl\ -o\ funny_video.flv\ "http://some/video"\f[].
However, it may contain special sequences that will be replaced when
downloading each video.
The special sequences have the format \f[C]%(NAME)s\f[].
To clarify, that is a percent symbol followed by a name in parenthesis,
followed by a lowercase S.
Allowed names are:
.IP \[bu] 2
\f[C]id\f[]: The sequence will be replaced by the video identifier.
.IP \[bu] 2
\f[C]url\f[]: The sequence will be replaced by the video URL.
.IP \[bu] 2
\f[C]uploader\f[]: The sequence will be replaced by the nickname of the
person who uploaded the video.
.IP \[bu] 2
\f[C]upload_date\f[]: The sequence will be replaced by the upload date
in YYYYMMDD format.
.IP \[bu] 2
\f[C]title\f[]: The sequence will be replaced by the video title.
.IP \[bu] 2
\f[C]ext\f[]: The sequence will be replaced by the appropriate extension
(like flv or mp4).
.IP \[bu] 2
\f[C]epoch\f[]: The sequence will be replaced by the Unix epoch when
creating the file.
.IP \[bu] 2
\f[C]autonumber\f[]: The sequence will be replaced by a five-digit
number that will be increased with each download, starting at zero.
.PP
The current default template is \f[C]%(id)s.%(ext)s\f[], but that will
be switchted to \f[C]%(title)s-%(id)s.%(ext)s\f[] (which can be
requested with \f[C]-t\f[] at the moment).
.PP
In some cases, you don\[aq]t want special characters such as 中, spaces,
or &, such as when transferring the downloaded filename to a Windows
system or the filename through an 8bit-unsafe channel.
In these cases, add the \f[C]--restrict-filenames\f[] flag to get a
shorter title:
.IP
.nf
\f[C]
$\ youtube-dl\ --get-filename\ -o\ "%(title)s.%(ext)s"\ BaW_jenozKc
youtube-dl\ test\ video\ \[aq]\[aq]_ä↭𝕐.mp4\ \ \ \ #\ All\ kinds\ of\ weird\ characters
$\ youtube-dl\ --get-filename\ -o\ "%(title)s.%(ext)s"\ BaW_jenozKc\ --restrict-filenames
youtube-dl_test_video_.mp4\ \ \ \ \ \ \ \ \ \ #\ A\ simple\ file\ name
\f[]
.fi
.SH FAQ
.SS Can you please put the -b option back?
.PP
@@ -203,10 +270,10 @@ Please note that Python 2.5 is not supported anymore.
.PP
Since June 2012 (#342) youtube-dl is packed as an executable zipfile,
simply unzip it (might need renaming to \f[C]youtube-dl.zip\f[] first on
some systems) or clone the git repo to see the code.
some systems) or clone the git repository, as laid out above.
If you modify the code, you can run it by executing the
\f[C]__main__.py\f[] file.
To recompile the executable, run \f[C]make\ compile\f[].
To recompile the executable, run \f[C]make\ youtube-dl\f[].
.SS The exe throws a \f[I]Runtime error from Visual C++\f[]
.PP
To run the exe you need to install first the Microsoft Visual C++ 2008

View File

@@ -3,7 +3,7 @@ __youtube-dl()
local cur prev opts
COMPREPLY=()
cur="${COMP_WORDS[COMP_CWORD]}"
opts="--all-formats --audio-format --audio-quality --auto-number --batch-file --console-title --continue --cookies --dump-user-agent --extract-audio --format --get-description --get-filename --get-format --get-thumbnail --get-title --get-url --help --ignore-errors --keep-video --list-extractors --list-formats --literal --match-title --max-downloads --max-quality --netrc --no-continue --no-mtime --no-overwrites --no-part --no-progress --output --password --playlist-end --playlist-start --prefer-free-formats --quiet --rate-limit --reject-title --retries --simulate --skip-download --srt-lang --title --update --user-agent --username --verbose --version --write-description --write-info-json --write-srt"
opts="--all-formats --audio-format --audio-quality --auto-number --batch-file --buffer-size --console-title --continue --cookies --dump-user-agent --extract-audio --format --get-description --get-filename --get-format --get-thumbnail --get-title --get-url --help --id --ignore-errors --keep-video --list-extractors --list-formats --literal --match-title --max-downloads --max-quality --netrc --no-continue --no-mtime --no-overwrites --no-part --no-progress --no-resize-buffer --output --password --playlist-end --playlist-start --prefer-free-formats --quiet --rate-limit --reject-title --restrict-filenames --retries --simulate --skip-download --srt-lang --title --update --user-agent --username --verbose --version --write-description --write-info-json --write-srt"
if [[ ${cur} == * ]] ; then
COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) )

BIN
youtube-dl.exe Executable file → Normal file

Binary file not shown.

View File

@@ -13,7 +13,7 @@ import urllib2
if os.name == 'nt':
import ctypes
from utils import *
@@ -44,37 +44,40 @@ class FileDownloader(object):
Available options:
username: Username for authentication purposes.
password: Password for authentication purposes.
usenetrc: Use netrc for authentication instead.
quiet: Do not print messages to stdout.
forceurl: Force printing final URL.
forcetitle: Force printing title.
forcethumbnail: Force printing thumbnail URL.
forcedescription: Force printing description.
forcefilename: Force printing final filename.
simulate: Do not download the video files.
format: Video format code.
format_limit: Highest quality format to try.
outtmpl: Template for output names.
ignoreerrors: Do not stop on download errors.
ratelimit: Download speed limit, in bytes/sec.
nooverwrites: Prevent overwriting files.
retries: Number of times to retry for HTTP error 5xx
continuedl: Try to continue downloads if possible.
noprogress: Do not print the progress bar.
playliststart: Playlist item to start at.
playlistend: Playlist item to end at.
matchtitle: Download only matching titles.
rejecttitle: Reject downloads for matching titles.
logtostderr: Log messages to stderr instead of stdout.
consoletitle: Display progress in console window's titlebar.
nopart: Do not use temporary .part files.
updatetime: Use the Last-modified header to set output file timestamps.
writedescription: Write the video description to a .description file
writeinfojson: Write the video description to a .info.json file
writesubtitles: Write the video subtitles to a .srt file
subtitleslang: Language of the subtitles to download
username: Username for authentication purposes.
password: Password for authentication purposes.
usenetrc: Use netrc for authentication instead.
quiet: Do not print messages to stdout.
forceurl: Force printing final URL.
forcetitle: Force printing title.
forcethumbnail: Force printing thumbnail URL.
forcedescription: Force printing description.
forcefilename: Force printing final filename.
simulate: Do not download the video files.
format: Video format code.
format_limit: Highest quality format to try.
outtmpl: Template for output names.
restrictfilenames: Do not allow "&" and spaces in file names
ignoreerrors: Do not stop on download errors.
ratelimit: Download speed limit, in bytes/sec.
nooverwrites: Prevent overwriting files.
retries: Number of times to retry for HTTP error 5xx
buffersize: Size of download buffer in bytes.
noresizebuffer: Do not automatically resize the download buffer.
continuedl: Try to continue downloads if possible.
noprogress: Do not print the progress bar.
playliststart: Playlist item to start at.
playlistend: Playlist item to end at.
matchtitle: Download only matching titles.
rejecttitle: Reject downloads for matching titles.
logtostderr: Log messages to stderr instead of stdout.
consoletitle: Display progress in console window's titlebar.
nopart: Do not use temporary .part files.
updatetime: Use the Last-modified header to set output file timestamps.
writedescription: Write the video description to a .description file
writeinfojson: Write the video description to a .info.json file
writesubtitles: Write the video subtitles to a .srt file
subtitleslang: Language of the subtitles to download
"""
params = None
@@ -93,6 +96,9 @@ class FileDownloader(object):
self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
self.params = params
if '%(stitle)s' in self.params['outtmpl']:
self.to_stderr(u'WARNING: %(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
@staticmethod
def format_bytes(bytes):
if bytes is None:
@@ -139,23 +145,23 @@ class FileDownloader(object):
new_min = max(bytes / 2.0, 1.0)
new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
if elapsed_time < 0.001:
return long(new_max)
return int(new_max)
rate = bytes / elapsed_time
if rate > new_max:
return long(new_max)
return int(new_max)
if rate < new_min:
return long(new_min)
return long(rate)
return int(new_min)
return int(rate)
@staticmethod
def parse_bytes(bytestr):
"""Parse a string indicating a byte quantity into a long integer."""
"""Parse a string indicating a byte quantity into an integer."""
matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
if matchobj is None:
return None
number = float(matchobj.group(1))
multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
return long(round(number * multiplier))
return int(round(number * multiplier))
def add_info_extractor(self, ie):
"""Add an InfoExtractor object to the end of the list."""
@@ -173,7 +179,6 @@ class FileDownloader(object):
if not self.params.get('quiet', False):
terminator = [u'\n', u''][skip_eol]
output = message + terminator
if 'b' not in self._screen_file.mode or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
output = output.encode(preferredencoding(), 'ignore')
self._screen_file.write(output)
@@ -181,7 +186,8 @@ class FileDownloader(object):
def to_stderr(self, message):
"""Print message to stderr."""
print >>sys.stderr, message.encode(preferredencoding())
assert type(message) == type(u'')
sys.stderr.write((message + u'\n').encode(preferredencoding()))
def to_cons_title(self, message):
"""Set console/terminal window title to message."""
@@ -321,8 +327,10 @@ class FileDownloader(object):
"""Generate the output filename."""
try:
template_dict = dict(info_dict)
template_dict['epoch'] = unicode(long(time.time()))
template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
template_dict['epoch'] = int(time.time())
template_dict['autonumber'] = u'%05d' % self._num_downloads
template_dict = dict((k, sanitize_filename(compat_str(v), self.params.get('restrictfilenames'))) for k,v in template_dict.items())
filename = self.params['outtmpl'] % template_dict
return filename
except (ValueError, KeyError), err:
@@ -334,17 +342,22 @@ class FileDownloader(object):
title = info_dict['title']
matchtitle = self.params.get('matchtitle', False)
if matchtitle and not re.search(matchtitle, title, re.IGNORECASE):
return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
if matchtitle:
matchtitle = matchtitle.decode('utf8')
if not re.search(matchtitle, title, re.IGNORECASE):
return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
rejecttitle = self.params.get('rejecttitle', False)
if rejecttitle and re.search(rejecttitle, title, re.IGNORECASE):
return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
if rejecttitle:
rejecttitle = rejecttitle.decode('utf8')
if re.search(rejecttitle, title, re.IGNORECASE):
return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
return None
def process_info(self, info_dict):
"""Process a single dictionary returned by an InfoExtractor."""
info_dict['stitle'] = sanitize_filename(info_dict['title'])
# Keep for backwards compatibility
info_dict['stitle'] = info_dict['title']
reason = self._match_entry(info_dict)
if reason is not None:
@@ -357,20 +370,20 @@ class FileDownloader(object):
raise MaxDownloadsReached()
filename = self.prepare_filename(info_dict)
# Forced printings
if self.params.get('forcetitle', False):
print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
print(info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace'))
if self.params.get('forceurl', False):
print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
print(info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace'))
if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
print(info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace'))
if self.params.get('forcedescription', False) and 'description' in info_dict:
print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
print(info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace'))
if self.params.get('forcefilename', False) and filename is not None:
print filename.encode(preferredencoding(), 'xmlcharrefreplace')
print(filename.encode(preferredencoding(), 'xmlcharrefreplace'))
if self.params.get('forceformat', False):
print info_dict['format'].encode(preferredencoding(), 'xmlcharrefreplace')
print(info_dict['format'].encode(preferredencoding(), 'xmlcharrefreplace'))
# Do nothing else if in simulate mode
if self.params.get('simulate', False):
@@ -399,10 +412,10 @@ class FileDownloader(object):
except (OSError, IOError):
self.trouble(u'ERROR: Cannot write description file ' + descfn)
return
if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
# subtitles download errors are already managed as troubles in relevant IE
# that way it will silently go on when used with unsupporting IE
# that way it will silently go on when used with unsupporting IE
try:
srtfn = filename.rsplit('.', 1)[0] + u'.srt'
self.report_writesubtitles(srtfn)
@@ -448,7 +461,7 @@ class FileDownloader(object):
except (ContentTooShortError, ), err:
self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
return
if success:
try:
self.post_process(filename, info_dict)
@@ -634,7 +647,7 @@ class FileDownloader(object):
data_len = long(data_len) + resume_len
data_len_str = self.format_bytes(data_len)
byte_counter = 0 + resume_len
block_size = 1024
block_size = self.params.get('buffersize', 1024)
start = time.time()
while True:
# Download and write
@@ -660,7 +673,8 @@ class FileDownloader(object):
except (IOError, OSError), err:
self.trouble(u'\nERROR: unable to write data: %s' % str(err))
return False
block_size = self.best_block_size(after - before, len(data_block))
if not self.params.get('noresizebuffer', False):
block_size = self.best_block_size(after - before, len(data_block))
# Progress message
speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)

View File

@@ -102,6 +102,7 @@ class YoutubeIE(InfoExtractor):
(?:https?://)? # http(s):// (optional)
(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/|
tube\.majestyc\.net/) # the various hostnames, with wildcard subdomains
(?:.*?\#/)? # handle anchor (#/) redirect urls
(?!view_play_list|my_playlists|artist|playlist) # ignore playlist URLs
(?: # the various things that can precede the ID:
(?:(?:v|embed|e)/) # v/ or embed/ or e/
@@ -212,9 +213,9 @@ class YoutubeIE(InfoExtractor):
return srt
def _print_formats(self, formats):
print 'Available formats:'
print('Available formats:')
for x in formats:
print '%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'flv'), self._video_dimensions.get(x, '???'))
print('%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'flv'), self._video_dimensions.get(x, '???')))
def _real_initialize(self):
if self._downloader is None:
@@ -237,7 +238,7 @@ class YoutubeIE(InfoExtractor):
else:
raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
except (IOError, netrc.NetrcParseError), err:
self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % compat_str(err))
return
# Set language
@@ -246,7 +247,7 @@ class YoutubeIE(InfoExtractor):
self.report_lang()
urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
self._downloader.to_stderr(u'WARNING: unable to set language: %s' % compat_str(err))
return
# No authentication to be performed
@@ -269,7 +270,7 @@ class YoutubeIE(InfoExtractor):
self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
return
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
self._downloader.to_stderr(u'WARNING: unable to log in: %s' % compat_str(err))
return
# Confirm age
@@ -282,7 +283,7 @@ class YoutubeIE(InfoExtractor):
self.report_age_confirmation()
age_results = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
self._downloader.trouble(u'ERROR: unable to confirm age: %s' % compat_str(err))
return
def _real_extract(self, url):
@@ -304,7 +305,7 @@ class YoutubeIE(InfoExtractor):
try:
video_webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
return
# Attempt to extract SWF player URL
@@ -326,7 +327,7 @@ class YoutubeIE(InfoExtractor):
if 'token' in video_info:
break
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % compat_str(err))
return
if 'token' not in video_info:
if 'reason' in video_info:
@@ -389,7 +390,7 @@ class YoutubeIE(InfoExtractor):
try:
srt_list = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
raise Trouble(u'WARNING: unable to download video subtitles: %s' % str(err))
raise Trouble(u'WARNING: unable to download video subtitles: %s' % compat_str(err))
srt_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', srt_list)
srt_lang_list = dict((l[1], l[0]) for l in srt_lang_list)
if not srt_lang_list:
@@ -406,13 +407,19 @@ class YoutubeIE(InfoExtractor):
try:
srt_xml = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
raise Trouble(u'WARNING: unable to download video subtitles: %s' % str(err))
raise Trouble(u'WARNING: unable to download video subtitles: %s' % compat_str(err))
if not srt_xml:
raise Trouble(u'WARNING: unable to download video subtitles')
video_subtitles = self._closed_captions_xml_to_srt(srt_xml.decode('utf-8'))
except Trouble as trouble:
self._downloader.trouble(trouble[0])
if 'length_seconds' not in video_info:
self._downloader.trouble(u'WARNING: unable to extract video duration')
video_duration = ''
else:
video_duration = urllib.unquote_plus(video_info['length_seconds'][0])
# token
video_token = urllib.unquote_plus(video_info['token'][0])
@@ -479,7 +486,8 @@ class YoutubeIE(InfoExtractor):
'thumbnail': video_thumbnail.decode('utf-8'),
'description': video_description,
'player_url': player_url,
'subtitles': video_subtitles
'subtitles': video_subtitles,
'duration': video_duration
})
return results
@@ -518,7 +526,7 @@ class MetacafeIE(InfoExtractor):
self.report_disclaimer()
disclaimer = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % compat_str(err))
return
# Confirm age
@@ -531,7 +539,7 @@ class MetacafeIE(InfoExtractor):
self.report_age_confirmation()
disclaimer = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
self._downloader.trouble(u'ERROR: unable to confirm age: %s' % compat_str(err))
return
def _real_extract(self, url):
@@ -555,7 +563,7 @@ class MetacafeIE(InfoExtractor):
self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % compat_str(err))
return
# Extract URL, uploader and title from webpage
@@ -595,7 +603,7 @@ class MetacafeIE(InfoExtractor):
return
video_title = mobj.group(1).decode('utf-8')
mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
mobj = re.search(r'submitter=(.*?);', webpage)
if mobj is None:
self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
return
@@ -648,7 +656,7 @@ class DailymotionIE(InfoExtractor):
self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % compat_str(err))
return
# Extract URL, uploader and title from webpage
@@ -684,9 +692,14 @@ class DailymotionIE(InfoExtractor):
video_title = unescapeHTML(mobj.group('title').decode('utf-8'))
video_uploader = u'NA'
mobj = re.search(r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a></span>', webpage)
mobj = re.search(r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>', webpage)
if mobj is None:
self._downloader.trouble(u'WARNING: unable to extract uploader nickname')
# lookin for official user
mobj_official = re.search(r'<span rel="author"[^>]+?>([^<]+?)</span>', webpage)
if mobj_official is None:
self._downloader.trouble(u'WARNING: unable to extract uploader nickname')
else:
video_uploader = mobj_official.group(1)
else:
video_uploader = mobj.group(1)
@@ -741,7 +754,7 @@ class GoogleIE(InfoExtractor):
self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
return
# Extract URL, uploader, and title from webpage
@@ -780,7 +793,7 @@ class GoogleIE(InfoExtractor):
try:
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
return
mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
if mobj is None:
@@ -836,7 +849,7 @@ class PhotobucketIE(InfoExtractor):
self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
return
# Extract URL, uploader, and title from webpage
@@ -906,7 +919,7 @@ class YahooIE(InfoExtractor):
try:
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
return
mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
@@ -930,7 +943,7 @@ class YahooIE(InfoExtractor):
self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
return
# Extract uploader and title from webpage
@@ -988,7 +1001,7 @@ class YahooIE(InfoExtractor):
self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
return
# Extract media URL from playlist XML
@@ -1017,7 +1030,7 @@ class VimeoIE(InfoExtractor):
"""Information extractor for vimeo.com."""
# _VALID_URL matches Vimeo URLs
_VALID_URL = r'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:groups/[^/]+/)?(?:videos?/)?([0-9]+)'
_VALID_URL = r'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:(?:groups|album)/[^/]+/)?(?:videos?/)?([0-9]+)'
IE_NAME = u'vimeo'
def __init__(self, downloader=None):
@@ -1046,7 +1059,7 @@ class VimeoIE(InfoExtractor):
self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
return
# Now we begin extracting as much information as we can from what we
@@ -1087,21 +1100,32 @@ class VimeoIE(InfoExtractor):
timestamp = config['request']['timestamp']
# Vimeo specific: extract video codec and quality information
# First consider quality, then codecs, then take everything
# TODO bind to format param
codecs = [('h264', 'mp4'), ('vp8', 'flv'), ('vp6', 'flv')]
for codec in codecs:
if codec[0] in config["video"]["files"]:
video_codec = codec[0]
video_extension = codec[1]
if 'hd' in config["video"]["files"][codec[0]]: quality = 'hd'
else: quality = 'sd'
files = { 'hd': [], 'sd': [], 'other': []}
for codec_name, codec_extension in codecs:
if codec_name in config["video"]["files"]:
if 'hd' in config["video"]["files"][codec_name]:
files['hd'].append((codec_name, codec_extension, 'hd'))
elif 'sd' in config["video"]["files"][codec_name]:
files['sd'].append((codec_name, codec_extension, 'sd'))
else:
files['other'].append((codec_name, codec_extension, config["video"]["files"][codec_name][0]))
for quality in ('hd', 'sd', 'other'):
if len(files[quality]) > 0:
video_quality = files[quality][0][2]
video_codec = files[quality][0][0]
video_extension = files[quality][0][1]
self._downloader.to_screen(u'[vimeo] %s: Downloading %s file at %s quality' % (video_id, video_codec.upper(), video_quality))
break
else:
self._downloader.trouble(u'ERROR: no known codec found')
return
video_url = "http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s&quality=%s&codecs=%s&type=moogaloop_local&embed_location=" \
%(video_id, sig, timestamp, quality, video_codec.upper())
%(video_id, sig, timestamp, video_quality, video_codec.upper())
return [{
'id': video_id,
@@ -1116,6 +1140,143 @@ class VimeoIE(InfoExtractor):
}]
class ArteTvIE(InfoExtractor):
"""arte.tv information extractor."""
_VALID_URL = r'(?:http://)?videos\.arte\.tv/(?:fr|de)/videos/.*'
_LIVE_URL = r'index-[0-9]+\.html$'
IE_NAME = u'arte.tv'
def __init__(self, downloader=None):
InfoExtractor.__init__(self, downloader)
def report_download_webpage(self, video_id):
"""Report webpage download."""
self._downloader.to_screen(u'[arte.tv] %s: Downloading webpage' % video_id)
def report_extraction(self, video_id):
"""Report information extraction."""
self._downloader.to_screen(u'[arte.tv] %s: Extracting information' % video_id)
def fetch_webpage(self, url):
self._downloader.increment_downloads()
request = urllib2.Request(url)
try:
self.report_download_webpage(url)
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
return
except ValueError, err:
self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
return
return webpage
def grep_webpage(self, url, regex, regexFlags, matchTuples):
page = self.fetch_webpage(url)
mobj = re.search(regex, page, regexFlags)
info = {}
if mobj is None:
self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
return
for (i, key, err) in matchTuples:
if mobj.group(i) is None:
self._downloader.trouble(err)
return
else:
info[key] = mobj.group(i)
return info
def extractLiveStream(self, url):
video_lang = url.split('/')[-4]
info = self.grep_webpage(
url,
r'src="(.*?/videothek_js.*?\.js)',
0,
[
(1, 'url', u'ERROR: Invalid URL: %s' % url)
]
)
http_host = url.split('/')[2]
next_url = 'http://%s%s' % (http_host, urllib.unquote(info.get('url')))
info = self.grep_webpage(
next_url,
r'(s_artestras_scst_geoFRDE_' + video_lang + '.*?)\'.*?' +
'(http://.*?\.swf).*?' +
'(rtmp://.*?)\'',
re.DOTALL,
[
(1, 'path', u'ERROR: could not extract video path: %s' % url),
(2, 'player', u'ERROR: could not extract video player: %s' % url),
(3, 'url', u'ERROR: could not extract video url: %s' % url)
]
)
video_url = u'%s/%s' % (info.get('url'), info.get('path'))
def extractPlus7Stream(self, url):
video_lang = url.split('/')[-3]
info = self.grep_webpage(
url,
r'param name="movie".*?videorefFileUrl=(http[^\'"&]*)',
0,
[
(1, 'url', u'ERROR: Invalid URL: %s' % url)
]
)
next_url = urllib.unquote(info.get('url'))
info = self.grep_webpage(
next_url,
r'<video lang="%s" ref="(http[^\'"&]*)' % video_lang,
0,
[
(1, 'url', u'ERROR: Could not find <video> tag: %s' % url)
]
)
next_url = urllib.unquote(info.get('url'))
info = self.grep_webpage(
next_url,
r'<video id="(.*?)".*?>.*?' +
'<name>(.*?)</name>.*?' +
'<dateVideo>(.*?)</dateVideo>.*?' +
'<url quality="hd">(.*?)</url>',
re.DOTALL,
[
(1, 'id', u'ERROR: could not extract video id: %s' % url),
(2, 'title', u'ERROR: could not extract video title: %s' % url),
(3, 'date', u'ERROR: could not extract video date: %s' % url),
(4, 'url', u'ERROR: could not extract video url: %s' % url)
]
)
return {
'id': info.get('id'),
'url': urllib.unquote(info.get('url')),
'uploader': u'arte.tv',
'upload_date': info.get('date'),
'title': info.get('title'),
'ext': u'mp4',
'format': u'NA',
'player_url': None,
}
def _real_extract(self, url):
video_id = url.split('/')[-1]
self.report_extraction(video_id)
if re.search(self._LIVE_URL, video_id) is not None:
self.extractLiveStream(url)
return
else:
info = self.extractPlus7Stream(url)
return [info]
class GenericIE(InfoExtractor):
"""Generic last-resort information extractor."""
@@ -1201,7 +1362,7 @@ class GenericIE(InfoExtractor):
self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
return
except ValueError, err:
# since this is the last-resort InfoExtractor, if
@@ -1322,7 +1483,7 @@ class YoutubeSearchIE(InfoExtractor):
try:
data = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download API page: %s' % str(err))
self._downloader.trouble(u'ERROR: unable to download API page: %s' % compat_str(err))
return
api_response = json.loads(data)['data']
@@ -1399,7 +1560,7 @@ class GoogleSearchIE(InfoExtractor):
try:
page = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
return
# Extract video identifiers
@@ -1482,7 +1643,7 @@ class YahooSearchIE(InfoExtractor):
try:
page = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
return
# Extract video identifiers
@@ -1508,9 +1669,9 @@ class YahooSearchIE(InfoExtractor):
class YoutubePlaylistIE(InfoExtractor):
"""Information Extractor for YouTube playlists."""
_VALID_URL = r'(?:https?://)?(?:\w+\.)?youtube\.com/(?:(?:course|view_play_list|my_playlists|artist|playlist)\?.*?(p|a|list)=|user/.*?/user/|p/|user/.*?#[pg]/c/)(?:PL)?([0-9A-Za-z-_]+)(?:/.*?/([0-9A-Za-z_-]+))?.*'
_VALID_URL = r'(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:(?:course|view_play_list|my_playlists|artist|playlist)\?.*?(p|a|list)=|user/.*?/user/|p/|user/.*?#[pg]/c/)(?:PL|EC)?|PL|EC)([0-9A-Za-z-_]+)(?:/.*?/([0-9A-Za-z_-]+))?.*'
_TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en'
_VIDEO_INDICATOR_TEMPLATE = r'/watch\?v=(.+?)&amp;list=.*?%s'
_VIDEO_INDICATOR_TEMPLATE = r'/watch\?v=(.+?)&amp;([^&"]+&amp;)*list=.*?%s'
_MORE_PAGES_INDICATOR = r'yt-uix-pager-next'
IE_NAME = u'youtube:playlist'
@@ -1552,7 +1713,7 @@ class YoutubePlaylistIE(InfoExtractor):
try:
page = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
return
# Extract video identifiers
@@ -1578,6 +1739,56 @@ class YoutubePlaylistIE(InfoExtractor):
return
class YoutubeChannelIE(InfoExtractor):
"""Information Extractor for YouTube channels."""
_VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)(?:/.*)?$"
_TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
_MORE_PAGES_INDICATOR = r'yt-uix-button-content">Next' # TODO
IE_NAME = u'youtube:channel'
def report_download_page(self, channel_id, pagenum):
"""Report attempt to download channel page with given number."""
self._downloader.to_screen(u'[youtube] Channel %s: Downloading page #%s' % (channel_id, pagenum))
def _real_extract(self, url):
# Extract channel id
mobj = re.match(self._VALID_URL, url)
if mobj is None:
self._downloader.trouble(u'ERROR: invalid url: %s' % url)
return
# Download channel pages
channel_id = mobj.group(1)
video_ids = []
pagenum = 1
while True:
self.report_download_page(channel_id, pagenum)
url = self._TEMPLATE_URL % (channel_id, pagenum)
request = urllib2.Request(url)
try:
page = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
return
# Extract video identifiers
ids_in_page = []
for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&', page):
if mobj.group(1) not in ids_in_page:
ids_in_page.append(mobj.group(1))
video_ids.extend(ids_in_page)
if re.search(self._MORE_PAGES_INDICATOR, page) is None:
break
pagenum = pagenum + 1
for id in video_ids:
self._downloader.download(['http://www.youtube.com/watch?v=%s' % id])
return
class YoutubeUserIE(InfoExtractor):
"""Information Extractor for YouTube users."""
@@ -1622,7 +1833,7 @@ class YoutubeUserIE(InfoExtractor):
try:
page = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
return
# Extract video identifiers
@@ -1694,7 +1905,7 @@ class BlipTVUserIE(InfoExtractor):
mobj = re.search(r'data-users-id="([^"]+)"', page)
page_base = page_base % mobj.group(1)
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
return
@@ -1782,7 +1993,7 @@ class DepositFilesIE(InfoExtractor):
self.report_download_webpage(file_id)
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % compat_str(err))
return
# Search for the real file URL
@@ -1899,7 +2110,7 @@ class FacebookIE(InfoExtractor):
else:
raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
except (IOError, netrc.NetrcParseError), err:
self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % compat_str(err))
return
if useremail is None:
@@ -1919,7 +2130,7 @@ class FacebookIE(InfoExtractor):
self._downloader.to_stderr(u'WARNING: unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
return
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
self._downloader.to_stderr(u'WARNING: unable to log in: %s' % compat_str(err))
return
def _real_extract(self, url):
@@ -1936,7 +2147,7 @@ class FacebookIE(InfoExtractor):
page = urllib2.urlopen(request)
video_webpage = page.read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
return
# Start extracting information
@@ -2070,13 +2281,13 @@ class BlipTVIE(InfoExtractor):
'urlhandle': urlh
}
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % compat_str(err))
return
if info is None: # Regular URL
try:
json_code = urlh.read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to read video info webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: unable to read video info webpage: %s' % compat_str(err))
return
try:
@@ -2144,7 +2355,7 @@ class MyVideoIE(InfoExtractor):
self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
return
self.report_extraction(video_id)
@@ -2179,6 +2390,25 @@ class ComedyCentralIE(InfoExtractor):
_VALID_URL = r'^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport))|(https?://)?(www\.)?(?P<showname>thedailyshow|colbertnation)\.com/full-episodes/(?P<episode>.*)$'
IE_NAME = u'comedycentral'
_available_formats = ['3500', '2200', '1700', '1200', '750', '400']
_video_extensions = {
'3500': 'mp4',
'2200': 'mp4',
'1700': 'mp4',
'1200': 'mp4',
'750': 'mp4',
'400': 'mp4',
}
_video_dimensions = {
'3500': '1280x720',
'2200': '960x540',
'1700': '768x432',
'1200': '640x360',
'750': '512x288',
'400': '384x216',
}
def report_extraction(self, episode_id):
self._downloader.to_screen(u'[comedycentral] %s: Extracting information' % episode_id)
@@ -2191,6 +2421,13 @@ class ComedyCentralIE(InfoExtractor):
def report_player_url(self, episode_id):
self._downloader.to_screen(u'[comedycentral] %s: Determining player URL' % episode_id)
def _print_formats(self, formats):
print('Available formats:')
for x in formats:
print('%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'mp4'), self._video_dimensions.get(x, '???')))
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
@@ -2231,10 +2468,19 @@ class ComedyCentralIE(InfoExtractor):
epTitle = mobj.group('episode')
mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*episode.*?:.*?))"', html)
if len(mMovieParams) == 0:
self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url)
return
if len(mMovieParams) == 0:
# The Colbert Report embeds the information in a without
# a URL prefix; so extract the alternate reference
# and then add the URL prefix manually.
altMovieParams = re.findall('data-mgid="([^"]*episode.*?:.*?)"', html)
if len(altMovieParams) == 0:
self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url)
return
else:
mMovieParams = [("http://media.mtvnservices.com/" + altMovieParams[0], altMovieParams[0])]
playerUrl_raw = mMovieParams[0][0]
self.report_player_url(epTitle)
try:
@@ -2283,10 +2529,31 @@ class ComedyCentralIE(InfoExtractor):
if len(turls) == 0:
self._downloader.trouble(u'\nERROR: unable to download ' + mediaId + ': No videos found')
continue
if self._downloader.params.get('listformats', None):
self._print_formats([i[0] for i in turls])
return
# For now, just pick the highest bitrate
format,video_url = turls[-1]
# Get the format arg from the arg stream
req_format = self._downloader.params.get('format', None)
# Select format if we can find one
for f,v in turls:
if f == req_format:
format, video_url = f, v
break
# Patch to download from alternative CDN, which does not
# break on current RTMPDump builds
broken_cdn = "rtmpe://viacomccstrmfs.fplive.net/viacomccstrm/gsp.comedystor/"
better_cdn = "rtmpe://cp10740.edgefcs.net/ondemand/mtvnorigin/gsp.comedystor/"
if video_url.startswith(broken_cdn):
video_url = video_url.replace(broken_cdn, better_cdn)
effTitle = showId + u'-' + epTitle
info = {
'id': shortMediaId,
@@ -2298,7 +2565,7 @@ class ComedyCentralIE(InfoExtractor):
'format': format,
'thumbnail': None,
'description': officialTitle,
'player_url': playerUrl
'player_url': None #playerUrl
}
results.append(info)
@@ -2406,7 +2673,7 @@ class CollegeHumorIE(InfoExtractor):
try:
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
return
m = re.search(r'id="video:(?P<internalvideoid>[0-9]+)"', webpage)
@@ -2425,7 +2692,7 @@ class CollegeHumorIE(InfoExtractor):
try:
metaXml = urllib2.urlopen(xmlUrl).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video info XML: %s' % str(err))
self._downloader.trouble(u'ERROR: unable to download video info XML: %s' % compat_str(err))
return
mdoc = xml.etree.ElementTree.fromstring(metaXml)
@@ -2471,7 +2738,7 @@ class XVideosIE(InfoExtractor):
try:
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
return
self.report_extraction(video_id)
@@ -2557,7 +2824,7 @@ class SoundcloudIE(InfoExtractor):
try:
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
return
self.report_extraction('%s/%s' % (uploader, slug_title))
@@ -2584,7 +2851,7 @@ class SoundcloudIE(InfoExtractor):
mobj = re.search('track-description-value"><p>(.*?)</p>', webpage)
if mobj:
description = mobj.group(1)
# upload date
upload_date = None
mobj = re.search("pretty-date'>on ([\w]+ [\d]+, [\d]+ \d+:\d+)</abbr></h2>", webpage)
@@ -2592,7 +2859,7 @@ class SoundcloudIE(InfoExtractor):
try:
upload_date = datetime.datetime.strptime(mobj.group(1), '%B %d, %Y %H:%M').strftime('%Y%m%d')
except Exception, e:
self._downloader.to_stderr(str(e))
self._downloader.to_stderr(compat_str(e))
# for soundcloud, a request to a cross domain is required for cookies
request = urllib2.Request('http://media.soundcloud.com/crossdomain.xml', std_headers)
@@ -2636,7 +2903,7 @@ class InfoQIE(InfoExtractor):
try:
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
return
self.report_extraction(url)
@@ -2722,15 +2989,15 @@ class MixcloudIE(InfoExtractor):
return None
def _print_formats(self, formats):
print 'Available formats:'
print('Available formats:')
for fmt in formats.keys():
for b in formats[fmt]:
try:
ext = formats[fmt][b][0]
print '%s\t%s\t[%s]' % (fmt, b, ext.split('.')[-1])
print('%s\t%s\t[%s]' % (fmt, b, ext.split('.')[-1]))
except TypeError: # we have no bitrate info
ext = formats[fmt][0]
print '%s\t%s\t[%s]' % (fmt, '??', ext.split('.')[-1])
print('%s\t%s\t[%s]' % (fmt, '??', ext.split('.')[-1]))
break
def _real_extract(self, url):
@@ -2750,7 +3017,7 @@ class MixcloudIE(InfoExtractor):
self.report_download_json(file_url)
jsonData = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve file: %s' % str(err))
self._downloader.trouble(u'ERROR: Unable to retrieve file: %s' % compat_str(err))
return
# parse JSON
@@ -2934,7 +3201,7 @@ class MTVIE(InfoExtractor):
try:
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
return
mobj = re.search(r'<meta name="mtv_vt" content="([^"]+)"/>', webpage)
@@ -2967,7 +3234,7 @@ class MTVIE(InfoExtractor):
try:
metadataXml = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video metadata: %s' % str(err))
self._downloader.trouble(u'ERROR: unable to download video metadata: %s' % compat_str(err))
return
mdoc = xml.etree.ElementTree.fromstring(metadataXml)
@@ -3054,7 +3321,7 @@ class YoukuIE(InfoExtractor):
self.report_download_webpage(video_id)
jsondata = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
return
self.report_extraction(video_id)
@@ -3230,7 +3497,7 @@ class GooglePlusIE(InfoExtractor):
try:
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve entry webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: Unable to retrieve entry webpage: %s' % compat_str(err))
return
# Extract update date
@@ -3272,7 +3539,7 @@ class GooglePlusIE(InfoExtractor):
try:
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
return
self.report_extract_vid_page(video_page)

View File

@@ -73,7 +73,7 @@ class FFmpegExtractAudioPP(PostProcessor):
def detect_executables():
def executable(exe):
try:
subprocess.check_output([exe, '-version'])
subprocess.Popen([exe, '-version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
except OSError:
return False
return exe
@@ -146,7 +146,7 @@ class FFmpegExtractAudioPP(PostProcessor):
if int(self._preferredquality) < 10:
more_opts += [self._exes['avconv'] and '-q:a' or '-aq', self._preferredquality]
else:
more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality]
more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k']
else:
# We convert the audio (lossy)
acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'm4a': 'aac', 'vorbis': 'libvorbis', 'wav': None}[self._preferredcodec]
@@ -156,7 +156,7 @@ class FFmpegExtractAudioPP(PostProcessor):
if int(self._preferredquality) < 10:
more_opts += [self._exes['avconv'] and '-q:a' or '-aq', self._preferredquality]
else:
more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality]
more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k']
if self._preferredcodec == 'aac':
more_opts += ['-f', 'adts']
if self._preferredcodec == 'm4a':

View File

@@ -1,6 +1,8 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import with_statement
__authors__ = (
'Ricardo Garcia Gonzalez',
'Danny Colligan',
@@ -16,10 +18,11 @@ __authors__ = (
'Ori Avtalion',
'shizeeg',
'Filippo Valsorda',
'Christian Albrecht',
)
__license__ = 'Public Domain'
__version__ = '2012.10.09'
__version__ = '2012.11.29'
UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl'
UPDATE_URL_VERSION = 'https://raw.github.com/rg3/youtube-dl/master/LATEST_VERSION'
@@ -46,7 +49,7 @@ from PostProcessor import *
def updateSelf(downloader, filename):
''' Update the program file with the latest version from the repository '''
# Note: downloader only used for options
if not os.access(filename, os.W_OK):
sys.exit('ERROR: no write permissions on %s' % filename)
@@ -64,7 +67,7 @@ def updateSelf(downloader, filename):
directory = os.path.dirname(exe)
if not os.access(directory, os.W_OK):
sys.exit('ERROR: no write permissions on %s' % directory)
try:
urlh = urllib2.urlopen(UPDATE_URL_EXE)
newcontent = urlh.read()
@@ -73,20 +76,18 @@ def updateSelf(downloader, filename):
outf.write(newcontent)
except (IOError, OSError), err:
sys.exit('ERROR: unable to download latest version')
try:
bat = os.path.join(directory, 'youtube-dl-updater.bat')
b = open(bat, 'w')
print >> b, """
b.write("""
echo Updating youtube-dl...
ping 127.0.0.1 -n 5 -w 1000 > NUL
move /Y "%s.new" "%s"
del "%s"
""" %(exe, exe, bat)
\n""" %(exe, exe, bat))
b.close()
os.startfile(bat)
except (IOError, OSError), err:
sys.exit('ERROR: unable to overwrite current version')
@@ -187,6 +188,11 @@ def parseOpts():
dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
general.add_option('-R', '--retries',
dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10)
general.add_option('--buffer-size',
dest='buffersize', metavar='SIZE', help='size of download buffer (e.g. 1024 or 16k) (default is %default)', default="1024")
general.add_option('--no-resize-buffer',
action='store_true', dest='noresizebuffer',
help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.', default=False)
general.add_option('--dump-user-agent',
action='store_true', dest='dump_user_agent',
help='display the current browser identification', default=False)
@@ -263,13 +269,18 @@ def parseOpts():
filesystem.add_option('-t', '--title',
action='store_true', dest='usetitle', help='use title in file name', default=False)
filesystem.add_option('--id',
action='store_true', dest='useid', help='use video ID in file name', default=False)
filesystem.add_option('-l', '--literal',
action='store_true', dest='useliteral', help='use literal title in file name', default=False)
action='store_true', dest='usetitle', help='[deprecated] alias of --title', default=False)
filesystem.add_option('-A', '--auto-number',
action='store_true', dest='autonumber',
help='number downloaded files starting from 00000', default=False)
filesystem.add_option('-o', '--output',
dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(stitle)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id and %% for a literal percent. Use - to output to stdout.')
dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id and %% for a literal percent. Use - to output to stdout.')
filesystem.add_option('--restrict-filenames',
action='store_true', dest='restrictfilenames',
help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames', default=False)
filesystem.add_option('-a', '--batch-file',
dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
filesystem.add_option('-w', '--no-overwrites',
@@ -294,7 +305,7 @@ def parseOpts():
help='write video metadata to a .info.json file', default=False)
postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False,
postproc.add_option('-x', '--extract-audio', action='store_true', dest='extractaudio', default=False,
help='convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)')
postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best',
help='"best", "aac", "vorbis", "mp3", "m4a", or "wav"; best by default')
@@ -328,6 +339,7 @@ def gen_extractors():
"""
return [
YoutubePlaylistIE(),
YoutubeChannelIE(),
YoutubeUserIE(),
YoutubeSearchIE(),
YoutubeIE(),
@@ -356,7 +368,7 @@ def gen_extractors():
YoukuIE(),
XNXXIE(),
GooglePlusIE(),
ArteTvIE(),
GenericIE()
]
@@ -421,10 +433,10 @@ def _real_main():
parser.error(u'using .netrc conflicts with giving username/password')
if opts.password is not None and opts.username is None:
parser.error(u'account username missing')
if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber):
parser.error(u'using output template conflicts with using title, literal title or auto number')
if opts.usetitle and opts.useliteral:
parser.error(u'using title conflicts with using literal title')
if opts.outtmpl is not None and (opts.usetitle or opts.autonumber or opts.useid):
parser.error(u'using output template conflicts with using title, video ID or auto number')
if opts.usetitle and opts.useid:
parser.error(u'using title conflicts with using video ID')
if opts.username is not None and opts.password is None:
opts.password = getpass.getpass(u'Type account password and press return:')
if opts.ratelimit is not None:
@@ -437,6 +449,11 @@ def _real_main():
opts.retries = long(opts.retries)
except (TypeError, ValueError), err:
parser.error(u'invalid retry count specified')
if opts.buffersize is not None:
numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize)
if numeric_buffersize is None:
parser.error(u'invalid buffer size specified')
opts.buffersize = numeric_buffersize
try:
opts.playliststart = int(opts.playliststart)
if opts.playliststart <= 0:
@@ -475,19 +492,20 @@ def _real_main():
'format_limit': opts.format_limit,
'listformats': opts.listformats,
'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
or (opts.format == '-1' and opts.usetitle and u'%(title)s-%(id)s-%(format)s.%(ext)s')
or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s')
or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
or (opts.usetitle and u'%(title)s-%(id)s.%(ext)s')
or (opts.useid and u'%(id)s.%(ext)s')
or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
or u'%(id)s.%(ext)s'),
'restrictfilenames': opts.restrictfilenames,
'ignoreerrors': opts.ignoreerrors,
'ratelimit': opts.ratelimit,
'nooverwrites': opts.nooverwrites,
'retries': opts.retries,
'buffersize': opts.buffersize,
'noresizebuffer': opts.noresizebuffer,
'continuedl': opts.continue_dl,
'noprogress': opts.noprogress,
'playliststart': opts.playliststart,
@@ -527,7 +545,7 @@ def _real_main():
parser.error(u'you must provide at least one URL')
else:
sys.exit()
try:
retcode = fd.download(all_urls)
except MaxDownloadsReached:

View File

@@ -26,6 +26,11 @@ std_headers = {
'Accept-Language': 'en-us,en;q=0.5',
}
try:
compat_str = unicode # Python 2
except NameError:
compat_str = str
def preferredencoding():
"""Get preferred encoding.
@@ -83,7 +88,6 @@ class IDParser(HTMLParser.HTMLParser):
HTMLParser.HTMLParser.__init__(self)
def error(self, message):
print >> sys.stderr, self.getpos()
if self.error_count > 10 or self.started:
raise HTMLParser.HTMLParseError(message, self.getpos())
self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line
@@ -190,14 +194,36 @@ def timeconvert(timestr):
if timetuple is not None:
timestamp = email.utils.mktime_tz(timetuple)
return timestamp
def sanitize_filename(s):
"""Sanitizes a string so it could be used as part of a filename."""
def sanitize_filename(s, restricted=False):
"""Sanitizes a string so it could be used as part of a filename.
If restricted is set, use a stricter subset of allowed characters.
"""
def replace_insane(char):
if char in u' .\\/|?*<>:"' or ord(char) < 32:
if char == '?' or ord(char) < 32 or ord(char) == 127:
return ''
elif char == '"':
return '' if restricted else '\''
elif char == ':':
return '_-' if restricted else ' -'
elif char in '\\/|*<>':
return '_'
if restricted and (char in '!&\'' or char.isspace()):
return '_'
if restricted and ord(char) > 127:
return '_'
return char
return u''.join(map(replace_insane, s)).strip('_')
result = u''.join(map(replace_insane, s))
while '__' in result:
result = result.replace('__', '_')
result = result.strip('_')
# Common case of "Foreign band name - English song title"
if restricted and result.startswith('-_'):
result = result[2:]
if not result:
result = '_'
return result
def orderedSet(iterable):
""" Remove all duplicates from the input iterable """
@@ -290,7 +316,7 @@ class ContentTooShortError(Exception):
class Trouble(Exception):
"""Trouble helper exception
This is an exception to be handled with
FileDownloader.trouble
"""