From 1a9b9649fbad88ef9f264d2f0e1804ba70c11469 Mon Sep 17 00:00:00 2001 From: Tim Sogard Date: Sun, 27 Jul 2014 17:14:29 -0700 Subject: Check load_more_widget_html for feed paging --- youtube_dl/extractor/youtube.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index c48d1b8ef..cd35a1620 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1325,6 +1325,7 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor): u'%s feed' % self._FEED_NAME, u'Downloading page %s' % i) feed_html = info.get('feed_html') or info.get('content_html') + load_more_widget_html = info.get('load_more_widget_html') or feed_html m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html) ids = orderedSet(m.group(1) for m in m_ids) feed_entries.extend( @@ -1332,7 +1333,7 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor): for video_id in ids) mobj = re.search( r'data-uix-load-more-href="/?[^"]+paging=(?P\d+)', - feed_html) + load_more_widget_html) if mobj is None: break paging = mobj.group('paging') -- cgit v1.2.3 From 09334400861bdd8600e68f555eea8d4d6f3c7155 Mon Sep 17 00:00:00 2001 From: Lovius Date: Sat, 6 Sep 2014 09:36:14 +0200 Subject: added extractor and __init__ loader for belgium local tv site telemb.be --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/telemb.py | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) create mode 100644 youtube_dl/extractor/telemb.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 7adca7df9..2ae656a4c 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -342,6 +342,7 @@ from .teachingchannel import TeachingChannelIE from .teamcoco import TeamcocoIE from .techtalks import TechTalksIE from .ted import TEDIE +from .telemb import TelembIE from .tenplay import TenPlayIE from .testurl import TestURLIE from .tf1 import TF1IE diff --git a/youtube_dl/extractor/telemb.py b/youtube_dl/extractor/telemb.py new file mode 100644 index 000000000..383c26d96 --- /dev/null +++ b/youtube_dl/extractor/telemb.py @@ -0,0 +1,40 @@ +import re +# -*- coding: utf-8 -*- +# needed for the title french ê! coding utf-8- -*- +# based on the vine.co and lots of help from https://filippo.io/add-support-for-a-new-video-site-to-youtube-dl/ +from .common import InfoExtractor + + +class TelembIE(InfoExtractor): + + _VALID_URL = r'https?://www\.telemb\.be/(?P.*)' + + _TEST = { + u'url': u'http://www.telemb.be/mons-cook-with-danielle-des-cours-de-cuisine-en-anglais-_d_13466.html', + u'file': u'mons-cook-with-danielle-des-cours-de-cuisine-en-anglais-_d_13466.html.mp4', + u'md5': u'f45ea69878516ba039835794e0f8f783', + u'info_dict': { + u"title": u'TéléMB : Mons - Cook with Danielle : des cours de cuisine en anglais ! - Les reportages' + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + + video_id = mobj.group('id') + webpage_url = 'http://www.telemb.be/' + video_id + webpage = self._download_webpage(webpage_url, video_id) + + + self.report_extraction(video_id) + + video_url = self._html_search_regex(r'"(http://wowza\.imust\.org/srv/vod/.*\.mp4)"', + webpage, u'video URL') + + return [{ + 'id': video_id, + 'url': video_url, + 'ext': 'mp4', + 'title': self._og_search_title(webpage), + 'thumbnail': self._og_search_thumbnail(webpage), + }] -- cgit v1.2.3 From 8e20f81c5b825093f8165d9ff977fcab5fc41d23 Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis Date: Sat, 6 Sep 2014 18:59:15 +0300 Subject: [sharesix] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/sharesix.py | 91 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+) create mode 100644 youtube_dl/extractor/sharesix.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 7adca7df9..c76fb3727 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -296,6 +296,7 @@ from .scivee import SciVeeIE from .screencast import ScreencastIE from .servingsys import ServingSysIE from .shared import SharedIE +from .sharesix import ShareSixIE from .sina import SinaIE from .slideshare import SlideshareIE from .slutload import SlutloadIE diff --git a/youtube_dl/extractor/sharesix.py b/youtube_dl/extractor/sharesix.py new file mode 100644 index 000000000..7531e8325 --- /dev/null +++ b/youtube_dl/extractor/sharesix.py @@ -0,0 +1,91 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_parse, + compat_urllib_request, + parse_duration, +) + + +class ShareSixIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?sharesix\.com/(?:f/)?(?P[0-9a-zA-Z]+)' + _TESTS = [ + { + 'url': 'http://sharesix.com/f/OXjQ7Y6', + 'md5': '9e8e95d8823942815a7d7c773110cc93', + 'info_dict': { + 'id': 'OXjQ7Y6', + 'ext': 'mp4', + 'title': 'big_buck_bunny_480p_surround-fix.avi', + 'duration': 596, + 'width': 854, + 'height': 480, + }, + }, + { + 'url': 'http://sharesix.com/lfrwoxp35zdd', + 'md5': 'dd19f1435b7cec2d7912c64beeee8185', + 'info_dict': { + 'id': 'lfrwoxp35zdd', + 'ext': 'flv', + 'title': 'WhiteBoard___a_Mac_vs_PC_Parody_Cartoon.mp4.flv', + 'duration': 65, + 'width': 1280, + 'height': 720, + }, + } + ] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + fields = { + 'method_free': 'Free' + } + post = compat_urllib_parse.urlencode(fields) + req = compat_urllib_request.Request(url, post) + req.add_header('Content-type', 'application/x-www-form-urlencoded') + + webpage = self._download_webpage(req, video_id, + 'Downloading video page') + + video_url = self._search_regex( + r"var\slnk1\s=\s'([^']+)'", webpage, 'video URL') + title = self._html_search_regex( + r'(?s)
Filename:
.+?
(.+?)
', webpage, 'title') + duration = parse_duration( + self._search_regex( + r'(?s)
Length:
.+?
(.+?)
', + webpage, + 'duration', + fatal=False + ) + ) + + m = re.search( + r'''(?xs)
Width\sx\sHeight
.+? +
(?P\d+)\sx\s(?P\d+)
''', + webpage + ) + width = height = None + if m: + width, height = int(m.group('width')), int(m.group('height')) + + formats = [{ + 'format_id': 'sd', + 'url': video_url, + 'width': width, + 'height': height, + }] + + return { + 'id': video_id, + 'title': title, + 'duration': duration, + 'formats': formats, + } -- cgit v1.2.3 From 5fb9077e8ca2e8bbab5f9e1c57747ff44296fe62 Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis Date: Sun, 7 Sep 2014 01:21:58 +0300 Subject: [moevideo] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/moevideo.py | 112 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 113 insertions(+) create mode 100644 youtube_dl/extractor/moevideo.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 7adca7df9..0ea28dc5b 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -200,6 +200,7 @@ from .mitele import MiTeleIE from .mixcloud import MixcloudIE from .mlb import MLBIE from .mpora import MporaIE +from .moevideo import MoeVideoIE from .mofosex import MofosexIE from .mojvideo import MojvideoIE from .mooshare import MooshareIE diff --git a/youtube_dl/extractor/moevideo.py b/youtube_dl/extractor/moevideo.py new file mode 100644 index 000000000..d2a73fdab --- /dev/null +++ b/youtube_dl/extractor/moevideo.py @@ -0,0 +1,112 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import json +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + compat_urllib_parse, + compat_urllib_request, + int_or_none, +) + + +class MoeVideoIE(InfoExtractor): + IE_DESC = 'moevideo.net and playreplay.net' + _VALID_URL = r'''(?x) + https?://(?P(?:www\.)? + (?:moevideo\.net|playreplay\.net))/ + (?:video|framevideo)/(?P[0-9]+\.[0-9A-Za-z]+)''' + _API_URL = 'http://api.letitbit.net/' + _API_KEY = 'tVL0gjqo5' + _TESTS = [ + { + 'url': 'http://moevideo.net/video/00297.0036103fe3d513ef27915216fd29', + 'md5': '129f5ae1f6585d0e9bb4f38e774ffb3a', + 'info_dict': { + 'id': '00297.0036103fe3d513ef27915216fd29', + 'ext': 'flv', + 'title': 'Sink cut out machine', + 'description': 'md5:f29ff97b663aefa760bf7ca63c8ca8a8', + 'thumbnail': 're:^https?://.*\.jpg$', + 'width': 540, + 'height': 360, + 'duration': 179, + 'filesize_approx': 17822500, + } + }, + { + 'url': 'http://playreplay.net/video/77107.7f325710a627383d40540d8e991a', + 'md5': '74f0a014d5b661f0f0e2361300d1620e', + 'info_dict': { + 'id': '77107.7f325710a627383d40540d8e991a', + 'ext': 'flv', + 'title': 'Operacion Condor.', + 'description': 'md5:7e68cb2fcda66833d5081c542491a9a3', + 'thumbnail': 're:^https?://.*\.jpg$', + 'width': 480, + 'height': 296, + 'duration': 6027, + 'filesize_approx': 588257923, + } + }, + ] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage( + 'http://%s/video/%s' % (mobj.group('host'), video_id), + video_id, 'Downloading webpage') + + title = self._og_search_title(webpage) + thumbnail = self._og_search_thumbnail(webpage) + description = self._og_search_description(webpage) + + r = [ + self._API_KEY, + [ + 'preview/flv_link', + { + 'uid': video_id, + }, + ], + ] + r_json = json.dumps(r) + post = compat_urllib_parse.urlencode({'r': r_json}) + req = compat_urllib_request.Request(self._API_URL, post) + req.add_header('Content-type', 'application/x-www-form-urlencoded') + + response = self._download_json(req, video_id) + if response['status'] != 'OK': + raise ExtractorError( + '%s returned error: %s' % (self.IE_NAME, response['data']), + expected=True + ) + item = response['data'][0] + video_url = item['link'] + duration = int_or_none(item['length']) + width = int_or_none(item['width']) + height = int_or_none(item['height']) + filesize = int_or_none(item['convert_size']) + + formats = [{ + 'format_id': 'sd', + 'http_headers': {'Range': 'bytes=0-'}, # Required to download + 'url': video_url, + 'width': width, + 'height': height, + 'filesize_approx': filesize, + }] + + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'description': description, + 'duration': duration, + 'formats': formats, + } -- cgit v1.2.3 From ff0ba8ce0fbfd099f85aa70480596f3831e02725 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 9 Sep 2014 21:04:57 +0700 Subject: [moevideo] Add videochart.net to _VALID_URL --- youtube_dl/extractor/moevideo.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/moevideo.py b/youtube_dl/extractor/moevideo.py index d2a73fdab..7dfa35a3f 100644 --- a/youtube_dl/extractor/moevideo.py +++ b/youtube_dl/extractor/moevideo.py @@ -14,10 +14,10 @@ from ..utils import ( class MoeVideoIE(InfoExtractor): - IE_DESC = 'moevideo.net and playreplay.net' + IE_DESC = 'LetitBit video services: moevideo.net, playreplay.net and videochart.net' _VALID_URL = r'''(?x) https?://(?P(?:www\.)? - (?:moevideo\.net|playreplay\.net))/ + (?:(?:moevideo|playreplay|videochart)\.net))/ (?:video|framevideo)/(?P[0-9]+\.[0-9A-Za-z]+)''' _API_URL = 'http://api.letitbit.net/' _API_KEY = 'tVL0gjqo5' -- cgit v1.2.3 From 0dc5365564d06451e6c90fb1b8653c8429559785 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 9 Sep 2014 21:48:10 +0700 Subject: [moevideo] Make filesize exact --- youtube_dl/extractor/moevideo.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/moevideo.py b/youtube_dl/extractor/moevideo.py index 7dfa35a3f..2ff79b9b8 100644 --- a/youtube_dl/extractor/moevideo.py +++ b/youtube_dl/extractor/moevideo.py @@ -34,7 +34,7 @@ class MoeVideoIE(InfoExtractor): 'width': 540, 'height': 360, 'duration': 179, - 'filesize_approx': 17822500, + 'filesize': 17822500, } }, { @@ -49,7 +49,7 @@ class MoeVideoIE(InfoExtractor): 'width': 480, 'height': 296, 'duration': 6027, - 'filesize_approx': 588257923, + 'filesize': 588257923, } }, ] @@ -99,7 +99,7 @@ class MoeVideoIE(InfoExtractor): 'url': video_url, 'width': width, 'height': height, - 'filesize_approx': filesize, + 'filesize': filesize, }] return { -- cgit v1.2.3 From 68477e88399ce102da3485e03e67d104b051f959 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 9 Sep 2014 22:09:32 +0700 Subject: [khanacademy] Improve _VALID_URL (Closes #3695) --- youtube_dl/extractor/khanacademy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/khanacademy.py b/youtube_dl/extractor/khanacademy.py index 04bac7517..408d00944 100644 --- a/youtube_dl/extractor/khanacademy.py +++ b/youtube_dl/extractor/khanacademy.py @@ -9,7 +9,7 @@ from ..utils import ( class KhanAcademyIE(InfoExtractor): - _VALID_URL = r'^https?://(?:www\.)?khanacademy\.org/(?P[^/]+)/(?:[^/]+/){,2}(?P[^?#/]+)(?:$|[?#])' + _VALID_URL = r'^https?://(?:(?:www|api)\.)?khanacademy\.org/(?P[^/]+)/(?:[^/]+/){,2}(?P[^?#/]+)(?:$|[?#])' IE_NAME = 'KhanAcademy' _TESTS = [{ -- cgit v1.2.3 From f009f19ece7ba46d643d1897711efd73ec274020 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 10 Sep 2014 01:09:04 +0700 Subject: [rutv] Fix rutv player regex --- youtube_dl/extractor/rutv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/rutv.py b/youtube_dl/extractor/rutv.py index 6c5f5a680..f737b4e5f 100644 --- a/youtube_dl/extractor/rutv.py +++ b/youtube_dl/extractor/rutv.py @@ -100,7 +100,7 @@ class RUTVIE(InfoExtractor): return mobj.group('url') mobj = re.search( - r']+?property=(["\'])og:video\1[^>]+?content=(["\'])(?Phttp://player\.(?:rutv\.ru|vgtrk\.com)/flash2v/container\.swf\?id=.+?\2)', + r']+?property=(["\'])og:video\1[^>]+?content=(["\'])(?Phttps?://player\.(?:rutv\.ru|vgtrk\.com)/flash2v/container\.swf\?id=.+?\2)', webpage) if mobj: return mobj.group('url') -- cgit v1.2.3 From c63b30901bb9135cd346602c44f14a54ba57a786 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 10 Sep 2014 01:24:05 +0700 Subject: [noco] Add authenticate support (Closes #3706) --- youtube_dl/extractor/noco.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/youtube_dl/extractor/noco.py b/youtube_dl/extractor/noco.py index da203538d..959fdf590 100644 --- a/youtube_dl/extractor/noco.py +++ b/youtube_dl/extractor/noco.py @@ -5,7 +5,10 @@ import re from .common import InfoExtractor from ..utils import ( + compat_urllib_request, + compat_urllib_parse, ExtractorError, + clean_html, unified_strdate, compat_str, ) @@ -13,6 +16,8 @@ from ..utils import ( class NocoIE(InfoExtractor): _VALID_URL = r'http://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P\d+)' + _LOGIN_URL = 'http://noco.tv/do.php' + _NETRC_MACHINE = 'noco' _TEST = { 'url': 'http://noco.tv/emission/11538/nolife/ami-ami-idol-hello-france/', @@ -30,6 +35,28 @@ class NocoIE(InfoExtractor): 'skip': 'Requires noco account', } + def _real_initialize(self): + self._login() + + def _login(self): + (username, password) = self._get_login_info() + if username is None: + return + + login_form = { + 'a': 'login', + 'cookie': '1', + 'username': username, + 'password': password, + } + request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form)) + request.add_header('Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8') + + login = self._download_json(request, None, 'Logging in as %s' % username) + + if 'erreur' in login: + raise ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True) + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') -- cgit v1.2.3 From 91ebb17ede283208f71969db6f20fafb345fc923 Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis Date: Tue, 9 Sep 2014 22:17:33 +0300 Subject: [hostingbulk] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/hostingbulk.py | 88 +++++++++++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+) create mode 100644 youtube_dl/extractor/hostingbulk.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 5e7a0a775..6b29e9fdd 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -140,6 +140,7 @@ from .hark import HarkIE from .helsinki import HelsinkiIE from .hentaistigma import HentaiStigmaIE from .hornbunny import HornBunnyIE +from .hostingbulk import HostingBulkIE from .hotnewhiphop import HotNewHipHopIE from .howcast import HowcastIE from .howstuffworks import HowStuffWorksIE diff --git a/youtube_dl/extractor/hostingbulk.py b/youtube_dl/extractor/hostingbulk.py new file mode 100644 index 000000000..a07dd4962 --- /dev/null +++ b/youtube_dl/extractor/hostingbulk.py @@ -0,0 +1,88 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + compat_urllib_request, + int_or_none, + urlencode_postdata, +) + + +class HostingBulkIE(InfoExtractor): + _VALID_URL = r'''(?x) + https?://(?:www\.)?hostingbulk\.com/ + (?:embed-)?(?P[A-Za-z0-9]{12})(?:-\d+x\d+)?\.html''' + _FILE_DELETED_REGEX = r'File Not Found' + _TEST = { + 'url': 'http://hostingbulk.com/n0ulw1hv20fm.html', + 'md5': '6c8653c8ecf7ebfa83b76e24b7b2fe3f', + 'info_dict': { + 'id': 'n0ulw1hv20fm', + 'ext': 'mp4', + 'title': 'md5:5afeba33f48ec87219c269e054afd622', + 'filesize': 6816081, + 'thumbnail': 're:^http://.*\.jpg$', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + url = 'http://hostingbulk.com/{0:}.html'.format(video_id) + + # Custom request with cookie to set language to English, so our file + # deleted regex would work. + request = compat_urllib_request.Request( + url, headers={'Cookie': 'lang=english'}) + webpage = self._download_webpage(request, video_id) + + if re.search(self._FILE_DELETED_REGEX, webpage) is not None: + raise ExtractorError('Video %s does not exist' % video_id, + expected=True) + + title = self._html_search_regex(r'

(.*?)

', webpage, 'title') + filesize = int_or_none( + self._search_regex( + r'\((\d+)\sbytes?\)', + webpage, + 'filesize', + fatal=False + ) + ) + thumbnail = self._search_regex( + r'', webpage, 'rand') + + fields = { + 'id': video_id, + 'method_free': '', + 'method_premium': '', + 'op': 'download2', + 'rand': rand, + 'referer': '', + } + request = compat_urllib_request.Request(url, urlencode_postdata(fields)) + request.add_header('Content-type', 'application/x-www-form-urlencoded') + response = self._request_webpage(request, video_id, + 'Submiting download request') + video_url = response.geturl() + + formats = [{ + 'format_id': 'sd', + 'filesize': filesize, + 'url': video_url, + }] + + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'formats': formats, + } -- cgit v1.2.3 From f1d15e6dbc194309a970174461799e1d9e7cf9a1 Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis Date: Wed, 10 Sep 2014 00:22:48 +0300 Subject: [izlesene] Adapt to website changes and improve --- youtube_dl/extractor/izlesene.py | 90 ++++++++++++++++++++++++++-------------- 1 file changed, 60 insertions(+), 30 deletions(-) diff --git a/youtube_dl/extractor/izlesene.py b/youtube_dl/extractor/izlesene.py index 79e8430b5..f0953c545 100644 --- a/youtube_dl/extractor/izlesene.py +++ b/youtube_dl/extractor/izlesene.py @@ -14,24 +14,45 @@ from ..utils import ( class IzleseneIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:www|m)\.)?izlesene\.com/(?:video|embedplayer)/(?:[^/]+/)?(?P[0-9]+)' + _VALID_URL = r'''(?x) + https?://(?:(?:www|m)\.)?izlesene\.com/ + (?:video|embedplayer)/(?:[^/]+/)?(?P[0-9]+) + ''' _STREAM_URL = 'http://panel.izlesene.com/api/streamurl/{id:}/{format:}' - _TEST = { - 'url': 'http://www.izlesene.com/video/sevincten-cildirtan-dogum-gunu-hediyesi/7599694', - 'md5': '4384f9f0ea65086734b881085ee05ac2', - 'info_dict': { - 'id': '7599694', - 'ext': 'mp4', - 'title': 'Sevinçten Çıldırtan Doğum Günü Hediyesi', - 'description': 'Annesi oğluna doğum günü hediyesi olarak minecraft cd si alıyor, ve çocuk hunharca seviniyor', - 'thumbnail': 're:^http://.*\.jpg', - 'uploader_id': 'pelikzzle', - 'timestamp': 1404298698, - 'upload_date': '20140702', - 'duration': 95.395, - 'age_limit': 0, - } - } + _TESTS = [ + { + 'url': 'http://www.izlesene.com/video/sevincten-cildirtan-dogum-gunu-hediyesi/7599694', + 'md5': '4384f9f0ea65086734b881085ee05ac2', + 'info_dict': { + 'id': '7599694', + 'ext': 'mp4', + 'title': 'Sevinçten Çıldırtan Doğum Günü Hediyesi', + 'description': 'md5:253753e2655dde93f59f74b572454f6d', + 'thumbnail': 're:^http://.*\.jpg', + 'uploader_id': 'pelikzzle', + 'timestamp': 1404298698, + 'upload_date': '20140702', + 'duration': 95.395, + 'age_limit': 0, + } + }, + { + 'url': 'http://www.izlesene.com/video/tarkan-dortmund-2006-konseri/17997', + 'md5': '97f09b6872bffa284cb7fa4f6910cb72', + 'info_dict': { + 'id': '17997', + 'ext': 'mp4', + 'title': 'Tarkan Dortmund 2006 Konseri', + 'description': 'Tarkan Dortmund 2006 Konseri', + 'thumbnail': 're:^http://.*\.jpg', + 'uploader_id': 'parlayankiz', + 'timestamp': 1163318593, + 'upload_date': '20061112', + 'duration': 253.666, + 'age_limit': 0, + } + }, + ] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -45,18 +66,21 @@ class IzleseneIE(InfoExtractor): thumbnail = self._og_search_thumbnail(webpage) uploader = self._html_search_regex( - r"adduserUsername\s*=\s*'([^']+)';", webpage, 'uploader', fatal=False, default='') + r"adduserUsername\s*=\s*'([^']+)';", + webpage, 'uploader', fatal=False, default='') timestamp = parse_iso8601(self._html_search_meta( 'uploadDate', webpage, 'upload date', fatal=False)) duration = int_or_none(self._html_search_regex( - r'"videoduration"\s*:\s*"([^"]+)"', webpage, 'duration', fatal=False)) + r'"videoduration"\s*:\s*"([^"]+)"', + webpage, 'duration', fatal=False)) if duration: duration /= 1000.0 view_count = str_to_int(get_element_by_id('videoViewCount', webpage)) comment_count = self._html_search_regex( - r'comment_count\s*=\s*\'([^\']+)\';', webpage, 'uploader', fatal=False) + r'comment_count\s*=\s*\'([^\']+)\';', + webpage, 'comment_count', fatal=False) family_friendly = self._html_search_meta( 'isFamilyFriendly', webpage, 'age limit', fatal=False) @@ -66,20 +90,26 @@ class IzleseneIE(InfoExtractor): ext = determine_ext(content_url, 'mp4') # Might be empty for some videos. - qualities = self._html_search_regex( - r'"quality"\s*:\s*"([^"]+)"', webpage, 'qualities', fatal=False, default='') + streams = self._html_search_regex( + r'"qualitylevel"\s*:\s*"([^"]+)"', + webpage, 'streams', fatal=False, default='') formats = [] - for quality in qualities.split('|'): - json = self._download_json( - self._STREAM_URL.format(id=video_id, format=quality), video_id, - note='Getting video URL for "%s" quality' % quality, - errnote='Failed to get video URL for "%s" quality' % quality - ) + if streams: + for stream in streams.split('|'): + quality, url = re.search(r'\[(\w+)\](.+)', stream).groups() + formats.append({ + 'format_id': '%sp' % quality if quality else 'sd', + 'url': url, + 'ext': ext, + }) + else: + stream_url = self._search_regex( + r'"streamurl"\s?:\s?"([^"]+)"', webpage, 'stream URL') formats.append({ - 'url': json.get('streamurl'), + 'format_id': 'sd', + 'url': stream_url, 'ext': ext, - 'format_id': '%sp' % quality if quality else 'sd', }) return { -- cgit v1.2.3 From 8e9da53140025eda3af92ce0adc5a886901ef58a Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis Date: Wed, 10 Sep 2014 00:24:48 +0300 Subject: [izlesene] Remove API URL --- youtube_dl/extractor/izlesene.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/izlesene.py b/youtube_dl/extractor/izlesene.py index f0953c545..17319e709 100644 --- a/youtube_dl/extractor/izlesene.py +++ b/youtube_dl/extractor/izlesene.py @@ -18,7 +18,6 @@ class IzleseneIE(InfoExtractor): https?://(?:(?:www|m)\.)?izlesene\.com/ (?:video|embedplayer)/(?:[^/]+/)?(?P[0-9]+) ''' - _STREAM_URL = 'http://panel.izlesene.com/api/streamurl/{id:}/{format:}' _TESTS = [ { 'url': 'http://www.izlesene.com/video/sevincten-cildirtan-dogum-gunu-hediyesi/7599694', -- cgit v1.2.3 From bc7ff0a8ddd9de9a242d98915a7487241215bb67 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 10 Sep 2014 12:26:39 +0200 Subject: release 2014.09.10 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index ecc2e0f53..986753f16 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.09.06' +__version__ = '2014.09.10' -- cgit v1.2.3 From 2c5c1f48e9cec92e531af074276eca1f48b1f153 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 10 Sep 2014 18:24:57 +0700 Subject: [izlesene] Simplify --- youtube_dl/extractor/izlesene.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/izlesene.py b/youtube_dl/extractor/izlesene.py index 17319e709..a83dd249f 100644 --- a/youtube_dl/extractor/izlesene.py +++ b/youtube_dl/extractor/izlesene.py @@ -9,6 +9,7 @@ from ..utils import ( parse_iso8601, determine_ext, int_or_none, + float_or_none, str_to_int, ) @@ -70,11 +71,9 @@ class IzleseneIE(InfoExtractor): timestamp = parse_iso8601(self._html_search_meta( 'uploadDate', webpage, 'upload date', fatal=False)) - duration = int_or_none(self._html_search_regex( + duration = float_or_none(self._html_search_regex( r'"videoduration"\s*:\s*"([^"]+)"', - webpage, 'duration', fatal=False)) - if duration: - duration /= 1000.0 + webpage, 'duration', fatal=False), scale=1000) view_count = str_to_int(get_element_by_id('videoViewCount', webpage)) comment_count = self._html_search_regex( -- cgit v1.2.3 From 1bf5423e82d17ebaa496865cfcfa845b4257e13e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 10 Sep 2014 18:29:20 +0700 Subject: [generic/youtube] Recognize youtube nocookie embeds (Closes #3713) --- youtube_dl/extractor/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 1b7697870..2bfa20606 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -628,7 +628,7 @@ class GenericIE(InfoExtractor): embedSWF\(?:\s* ) (["\']) - (?P(?:https?:)?//(?:www\.)?youtube\.com/ + (?P(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/ (?:embed|v)/.+?) \1''', webpage) if matches: -- cgit v1.2.3 From 6899f2fe9ed2347255f216cfcfc954dd716e5e14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 10 Sep 2014 19:13:22 +0700 Subject: [hostingbulk] Simplify --- youtube_dl/extractor/hostingbulk.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/hostingbulk.py b/youtube_dl/extractor/hostingbulk.py index a07dd4962..8e812b669 100644 --- a/youtube_dl/extractor/hostingbulk.py +++ b/youtube_dl/extractor/hostingbulk.py @@ -57,17 +57,13 @@ class HostingBulkIE(InfoExtractor): thumbnail = self._search_regex( r'', webpage, 'rand') - fields = { - 'id': video_id, - 'method_free': '', - 'method_premium': '', - 'op': 'download2', - 'rand': rand, - 'referer': '', - } + fields = dict(re.findall(r'''(?x) Date: Wed, 10 Sep 2014 16:39:23 +0200 Subject: release 2014.09.10.1 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 986753f16..680107346 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.09.10' +__version__ = '2014.09.10.1' -- cgit v1.2.3 From 6024b0f25e4a27372a325151630c9c3d17cc7c4f Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis Date: Wed, 10 Sep 2014 18:10:21 +0300 Subject: [nosvideo] Check for deleted videos --- youtube_dl/extractor/nosvideo.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/nosvideo.py b/youtube_dl/extractor/nosvideo.py index 095965add..79bdba40a 100644 --- a/youtube_dl/extractor/nosvideo.py +++ b/youtube_dl/extractor/nosvideo.py @@ -5,8 +5,9 @@ import re from .common import InfoExtractor from ..utils import ( - compat_urllib_parse, + ExtractorError, compat_urllib_request, + urlencode_postdata, xpath_with_ns, ) @@ -18,6 +19,7 @@ class NosVideoIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?nosvideo\.com/' + \ '(?:embed/|\?v=)(?P[A-Za-z0-9]{12})/?' _PLAYLIST_URL = 'http://nosvideo.com/xml/{xml_id:s}.xml' + _FILE_DELETED_REGEX = r'File Not Found' _TEST = { 'url': 'http://nosvideo.com/?v=drlp6s40kg54', 'md5': '4b4ac54c6ad5d70ab88f2c2c6ccec71c', @@ -38,11 +40,14 @@ class NosVideoIE(InfoExtractor): 'op': 'download1', 'method_free': 'Continue to Video', } - post = compat_urllib_parse.urlencode(fields) - req = compat_urllib_request.Request(url, post) + req = compat_urllib_request.Request(url, urlencode_postdata(fields)) req.add_header('Content-type', 'application/x-www-form-urlencoded') webpage = self._download_webpage(req, video_id, 'Downloading download page') + if re.search(self._FILE_DELETED_REGEX, webpage) is not None: + raise ExtractorError('Video %s does not exist' % video_id, + expected=True) + xml_id = self._search_regex(r'php\|([^\|]+)\|', webpage, 'XML ID') playlist_url = self._PLAYLIST_URL.format(xml_id=xml_id) playlist = self._download_xml(playlist_url, video_id) -- cgit v1.2.3 From 08d037309ed301a74fbf672ab723a4f3377099de Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis Date: Wed, 10 Sep 2014 18:19:59 +0300 Subject: [nosvideo] Update test --- youtube_dl/extractor/nosvideo.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/nosvideo.py b/youtube_dl/extractor/nosvideo.py index 79bdba40a..8c2c428fc 100644 --- a/youtube_dl/extractor/nosvideo.py +++ b/youtube_dl/extractor/nosvideo.py @@ -21,10 +21,10 @@ class NosVideoIE(InfoExtractor): _PLAYLIST_URL = 'http://nosvideo.com/xml/{xml_id:s}.xml' _FILE_DELETED_REGEX = r'File Not Found' _TEST = { - 'url': 'http://nosvideo.com/?v=drlp6s40kg54', - 'md5': '4b4ac54c6ad5d70ab88f2c2c6ccec71c', + 'url': 'http://nosvideo.com/?v=mu8fle7g7rpq', + 'md5': '6124ed47130d8be3eacae635b071e6b6', 'info_dict': { - 'id': 'drlp6s40kg54', + 'id': 'mu8fle7g7rpq', 'ext': 'mp4', 'title': 'big_buck_bunny_480p_surround-fix.avi.mp4', 'thumbnail': 're:^https?://.*\.jpg$', -- cgit v1.2.3 From eef93b09128710d94d53d794ba1206300f72f144 Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis Date: Thu, 11 Sep 2014 00:10:18 +0300 Subject: [tvplay] Support similar services in different countries --- youtube_dl/extractor/tvplay.py | 147 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 145 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py index a56a7ab5f..445e0ec41 100644 --- a/youtube_dl/extractor/tvplay.py +++ b/youtube_dl/extractor/tvplay.py @@ -6,13 +6,28 @@ import re from .common import InfoExtractor from ..utils import ( ExtractorError, + compat_str, parse_iso8601, qualities, ) class TVPlayIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?tvplay\.lv/parraides/[^/]+/(?P\d+)' + IE_DESC = 'TV3Play and related services' + _VALID_URL = r'''(?x)http://(?:www\.)? + (?:tvplay\.lv/parraides| + tv3play\.lt/programos| + tv3play\.ee/sisu| + tv3play\.se/program| + tv6play\.se/program| + tv8play\.se/program| + tv10play\.se/program| + tv3play\.no/programmer| + viasat4play\.no/programmer| + tv6play\.no/programmer| + tv3play\.dk/programmer| + )/[^/]+/(?P\d+) + ''' _TESTS = [ { 'url': 'http://www.tvplay.lv/parraides/vinas-melo-labak/418113?autostart=true', @@ -30,6 +45,134 @@ class TVPlayIE(InfoExtractor): 'skip_download': True, }, }, + { + 'url': 'http://www.tv3play.lt/programos/moterys-meluoja-geriau/409229?autostart=true', + 'info_dict': { + 'id': '409229', + 'ext': 'flv', + 'title': 'Moterys meluoja geriau', + 'description': 'md5:9aec0fc68e2cbc992d2a140bd41fa89e', + 'duration': 1330, + 'timestamp': 1403769181, + 'upload_date': '20140626', + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + }, + { + 'url': 'http://www.tv3play.ee/sisu/kodu-keset-linna/238551?autostart=true', + 'info_dict': { + 'id': '238551', + 'ext': 'flv', + 'title': 'Kodu keset linna 398537', + 'description': 'md5:7df175e3c94db9e47c0d81ffa5d68701', + 'duration': 1257, + 'timestamp': 1292449761, + 'upload_date': '20101215', + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + }, + { + 'url': 'http://www.tv3play.se/program/husraddarna/395385?autostart=true', + 'info_dict': { + 'id': '395385', + 'ext': 'flv', + 'title': 'Husräddarna S02E07', + 'description': 'md5:f210c6c89f42d4fc39faa551be813777', + 'duration': 2574, + 'timestamp': 1400596321, + 'upload_date': '20140520', + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + }, + { + 'url': 'http://www.tv6play.se/program/den-sista-dokusapan/266636?autostart=true', + 'info_dict': { + 'id': '266636', + 'ext': 'flv', + 'title': 'Den sista dokusåpan S01E08', + 'description': 'md5:295be39c872520221b933830f660b110', + 'duration': 1492, + 'timestamp': 1330522854, + 'upload_date': '20120229', + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + }, + { + 'url': 'http://www.tv8play.se/program/antikjakten/282756?autostart=true', + 'info_dict': { + 'id': '282756', + 'ext': 'flv', + 'title': 'Antikjakten S01E10', + 'description': 'md5:1b201169beabd97e20c5ad0ad67b13b8', + 'duration': 2646, + 'timestamp': 1348575868, + 'upload_date': '20120925', + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + }, + { + 'url': 'http://www.tv3play.no/programmer/anna-anka-soker-assistent/230898?autostart=true', + 'info_dict': { + 'id': '230898', + 'ext': 'flv', + 'title': 'Anna Anka søker assistent - Ep. 8', + 'description': 'md5:f80916bf5bbe1c5f760d127f8dd71474', + 'duration': 2656, + 'timestamp': 1277720005, + 'upload_date': '20100628', + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + }, + { + 'url': 'http://www.viasat4play.no/programmer/budbringerne/21873?autostart=true', + 'info_dict': { + 'id': '21873', + 'ext': 'flv', + 'title': 'Budbringerne program 10', + 'description': 'md5:4db78dc4ec8a85bb04fd322a3ee5092d', + 'duration': 1297, + 'timestamp': 1254205102, + 'upload_date': '20090929', + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + }, + { + 'url': 'http://www.tv6play.no/programmer/hotelinspektor-alex-polizzi/361883?autostart=true', + 'info_dict': { + 'id': '361883', + 'ext': 'flv', + 'title': 'Hotelinspektør Alex Polizzi - Ep. 10', + 'description': 'md5:3ecf808db9ec96c862c8ecb3a7fdaf81', + 'duration': 2594, + 'timestamp': 1393236292, + 'upload_date': '20140224', + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + }, ] def _real_extract(self, url): @@ -49,7 +192,7 @@ class TVPlayIE(InfoExtractor): quality = qualities(['hls', 'medium', 'high']) formats = [] for format_id, video_url in streams['streams'].items(): - if not video_url: + if not video_url or not isinstance(video_url, compat_str): continue fmt = { 'format_id': format_id, -- cgit v1.2.3 From f401a4173b05ec36f00e240fecc728059fd669b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 11 Sep 2014 21:44:02 +0200 Subject: [youjizz] Don't make the url protocol optional (fixes #3722) It would fail to download the webpag if it's missing. And the generic extractor will add it. --- youtube_dl/extractor/youjizz.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youjizz.py b/youtube_dl/extractor/youjizz.py index fcb5ff758..b86331e3c 100644 --- a/youtube_dl/extractor/youjizz.py +++ b/youtube_dl/extractor/youjizz.py @@ -9,7 +9,7 @@ from ..utils import ( class YouJizzIE(InfoExtractor): - _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youjizz\.com/videos/(?P[^.]+)\.html$' + _VALID_URL = r'^https?://(?:\w+\.)?youjizz\.com/videos/(?P[^.]+)\.html$' _TEST = { 'url': 'http://www.youjizz.com/videos/zeichentrick-1-2189178.html', 'file': '2189178.flv', -- cgit v1.2.3 From edb53e2dc33c37a8c4cef3ec541084171adeed5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 11 Sep 2014 21:47:25 +0200 Subject: [youtube] Don't make the url protocol optional The generic extractor will add it. --- youtube_dl/extractor/youtube.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 70f670682..c77f09aac 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -199,7 +199,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): IE_DESC = u'YouTube.com' _VALID_URL = r"""(?x)^ ( - (?:https?://|//)? # http(s):// or protocol-independent URL (optional) + (?:https?://|//) # http(s):// or protocol-independent URL (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/| (?:www\.)?deturl\.com/www\.youtube\.com/| (?:www\.)?pwnyoutube\.com/| @@ -217,7 +217,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): ) )) |youtu\.be/ # just youtu.be/xxxx - |https?://(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId= + |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId= ) )? # all until now is optional -> you can pass the naked ID ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID -- cgit v1.2.3 From aa37e3d486f52b8c7a22dd5255469292a6a6afb9 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 12 Sep 2014 07:50:31 +0200 Subject: [utils] Default SSL to TLS. (Fixes #3727) On 2.x, we now try TLS first, and fall back to the compat 23 (basically anything) afterwards. On 3.4+, we now use the proper function so that we get all the latest security configurations. We allow SSLv3 though for the time being, since a lot of older pages use that. On 3.3, we default to SSLv23 (basically "anything, including TLS") because that has the widest compatibility. --- youtube_dl/utils.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 0bc410e91..d920c65a4 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -617,7 +617,7 @@ def make_HTTPS_handler(opts_no_check_certificate, **kwargs): self.sock = sock self._tunnel() try: - self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv3) + self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_TLSv1) except ssl.SSLError: self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23) @@ -625,8 +625,14 @@ def make_HTTPS_handler(opts_no_check_certificate, **kwargs): def https_open(self, req): return self.do_open(HTTPSConnectionV3, req) return HTTPSHandlerV3(**kwargs) - else: - context = ssl.SSLContext(ssl.PROTOCOL_SSLv3) + elif hasattr(ssl, 'create_default_context'): # Python >= 3.4 + context = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH) + context.options &= ~ssl.OP_NO_SSLv3 # Allow older, not-as-secure SSLv3 + if opts_no_check_certificate: + context.verify_mode = ssl.CERT_NONE + return compat_urllib_request.HTTPSHandler(context=context, **kwargs) + else: # Python < 3.4 + context = ssl.SSLContext(ssl.PROTOCOL_SSLv23) context.verify_mode = (ssl.CERT_NONE if opts_no_check_certificate else ssl.CERT_REQUIRED) -- cgit v1.2.3 From 473219a77888fc733626aa7edc77b2687ac9eed7 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 12 Sep 2014 07:56:05 +0200 Subject: release 2014.09.12 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 680107346..7496e9296 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.09.10.1' +__version__ = '2014.09.12' -- cgit v1.2.3 From adf2c0989d6d525b3a691eb64651b3330f5a76fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 12 Sep 2014 20:51:48 +0700 Subject: [telemb] Extract all formats and modernize --- youtube_dl/extractor/__init__.py | 2 +- youtube_dl/extractor/telemb.py | 93 ++++++++++++++++++++++++++++------------ 2 files changed, 66 insertions(+), 29 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 94e370281..13b3616d3 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -345,7 +345,7 @@ from .teachingchannel import TeachingChannelIE from .teamcoco import TeamcocoIE from .techtalks import TechTalksIE from .ted import TEDIE -from .telemb import TelembIE +from .telemb import TeleMBIE from .tenplay import TenPlayIE from .testurl import TestURLIE from .tf1 import TF1IE diff --git a/youtube_dl/extractor/telemb.py b/youtube_dl/extractor/telemb.py index 383c26d96..cf5bb89b1 100644 --- a/youtube_dl/extractor/telemb.py +++ b/youtube_dl/extractor/telemb.py @@ -1,40 +1,77 @@ +# coding: utf-8 +from __future__ import unicode_literals + import re -# -*- coding: utf-8 -*- -# needed for the title french ê! coding utf-8- -*- -# based on the vine.co and lots of help from https://filippo.io/add-support-for-a-new-video-site-to-youtube-dl/ -from .common import InfoExtractor +from .common import InfoExtractor +from ..utils import remove_start -class TelembIE(InfoExtractor): - _VALID_URL = r'https?://www\.telemb\.be/(?P.*)' - - _TEST = { - u'url': u'http://www.telemb.be/mons-cook-with-danielle-des-cours-de-cuisine-en-anglais-_d_13466.html', - u'file': u'mons-cook-with-danielle-des-cours-de-cuisine-en-anglais-_d_13466.html.mp4', - u'md5': u'f45ea69878516ba039835794e0f8f783', - u'info_dict': { - u"title": u'TéléMB : Mons - Cook with Danielle : des cours de cuisine en anglais ! - Les reportages' - } - } +class TeleMBIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?telemb\.be/(?P.+?)_d_(?P\d+)\.html' + _TESTS = [ + { + 'url': 'http://www.telemb.be/mons-cook-with-danielle-des-cours-de-cuisine-en-anglais-_d_13466.html', + 'md5': 'f45ea69878516ba039835794e0f8f783', + 'info_dict': { + 'id': '13466', + 'display_id': 'mons-cook-with-danielle-des-cours-de-cuisine-en-anglais-', + 'ext': 'mp4', + 'title': 'Mons - Cook with Danielle : des cours de cuisine en anglais ! - Les reportages', + 'description': 'md5:bc5225f47b17c309761c856ad4776265', + 'thumbnail': 're:^http://.*\.(?:jpg|png)$', + } + }, + { + 'url': 'http://telemb.be/les-reportages-havre-incendie-mortel_d_13514.html', + 'md5': '6e9682736e5ccd4eab7f21e855350733', + 'info_dict': { + 'id': '13514', + 'display_id': 'les-reportages-havre-incendie-mortel', + 'ext': 'mp4', + 'title': 'Havré - Incendie mortel - Les reportages', + 'description': 'md5:5e54cb449acb029c2b7734e2d946bd4a', + 'thumbnail': 're:^http://.*\.(?:jpg|png)$', + } + }, + ] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - webpage_url = 'http://www.telemb.be/' + video_id - webpage = self._download_webpage(webpage_url, video_id) + display_id = mobj.group('display_id') + webpage = self._download_webpage(url, display_id) - self.report_extraction(video_id) + formats = [] + for video_url in re.findall(r'file\s*:\s*"([^"]+)"', webpage): + fmt = { + 'url': video_url, + 'format_id': video_url.split(':')[0] + } + rtmp = re.search(r'^(?Prtmp://[^/]+/(?P.+))/(?Pmp4:.+)$', video_url) + if rtmp: + fmt.update({ + 'play_path': rtmp.group('playpath'), + 'app': rtmp.group('app'), + 'player_url': 'http://p.jwpcdn.com/6/10/jwplayer.flash.swf', + 'page_url': 'http://www.telemb.be', + 'preference': -1, + }) + formats.append(fmt) + self._sort_formats(formats) - video_url = self._html_search_regex(r'"(http://wowza\.imust\.org/srv/vod/.*\.mp4)"', - webpage, u'video URL') + title = remove_start(self._og_search_title(webpage), 'TéléMB : ') + description = self._html_search_regex( + r'', + webpage, 'description', fatal=False) + thumbnail = self._og_search_thumbnail(webpage) - return [{ - 'id': video_id, - 'url': video_url, - 'ext': 'mp4', - 'title': self._og_search_title(webpage), - 'thumbnail': self._og_search_thumbnail(webpage), - }] + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'formats': formats, + } -- cgit v1.2.3 From 37419b4f9937f11ed3ca3545a32ed3451eb734ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 12 Sep 2014 23:20:17 +0700 Subject: [YoutubeDL] Escape non-ASCII characters in URLs urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991) Working around by replacing request's original URL with escaped one --- youtube_dl/YoutubeDL.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 553bf559b..99cd05e65 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -28,6 +28,7 @@ from .utils import ( compat_str, compat_urllib_error, compat_urllib_request, + compat_urllib_parse_urlparse, ContentTooShortError, date_from_str, DateRange, @@ -1241,6 +1242,31 @@ class YoutubeDL(object): def urlopen(self, req): """ Start an HTTP download """ + + # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991) + # Working around by replacing request's original URL with escaped one + + url = req if isinstance(req, compat_str) else req.get_full_url() + + def escape(component): + return compat_cookiejar.escape_path(component.encode('utf-8')) + + url_parsed = compat_urllib_parse_urlparse(url) + url_escaped = url_parsed._replace( + path=escape(url_parsed.path), + query=escape(url_parsed.query), + fragment=escape(url_parsed.fragment) + ).geturl() + + # Substitute URL if any change after escaping + if url != url_escaped: + if isinstance(req, compat_str): + req = url_escaped + else: + req = compat_urllib_request.Request( + url_escaped, data=req.data, headers=req.headers, + origin_req_host=req.origin_req_host, unverifiable=req.unverifiable) + return self._opener.open(req, timeout=self._socket_timeout) def print_debug_header(self): -- cgit v1.2.3 From f24e740bb8e40bcd12dea004ad44104d672b5884 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 12 Sep 2014 23:22:07 +0700 Subject: [telemb] Add comment to test --- youtube_dl/extractor/telemb.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/telemb.py b/youtube_dl/extractor/telemb.py index cf5bb89b1..1bbd0e7bd 100644 --- a/youtube_dl/extractor/telemb.py +++ b/youtube_dl/extractor/telemb.py @@ -23,6 +23,7 @@ class TeleMBIE(InfoExtractor): } }, { + # non-ASCII characters in download URL 'url': 'http://telemb.be/les-reportages-havre-incendie-mortel_d_13514.html', 'md5': '6e9682736e5ccd4eab7f21e855350733', 'info_dict': { -- cgit v1.2.3 From da2e1f53e07bb24405bb9e27c58fa52fe0b58087 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 12 Sep 2014 07:25:58 +0200 Subject: [deezer] Add simple support --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/deezer.py | 80 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+) create mode 100644 youtube_dl/extractor/deezer.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 6b29e9fdd..eba7dc0a2 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -68,6 +68,7 @@ from .dailymotion import ( ) from .daum import DaumIE from .dbtv import DBTVIE +from .deezer import DeezerPlaylistIE from .dfb import DFBIE from .dotsub import DotsubIE from .dreisat import DreiSatIE diff --git a/youtube_dl/extractor/deezer.py b/youtube_dl/extractor/deezer.py new file mode 100644 index 000000000..62c181529 --- /dev/null +++ b/youtube_dl/extractor/deezer.py @@ -0,0 +1,80 @@ +from __future__ import unicode_literals + +import json +import re + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + orderedSet, +) + + +class DeezerPlaylistIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?deezer\.com/playlist/(?P[0-9]+)' + _TEST = { + 'url': 'http://www.deezer.com/playlist/176747451', + 'info_dict': { + 'id': '176747451', + 'title': 'Best!', + 'uploader': 'Anonymous', + 'thumbnail': 're:^https?://cdn-images.deezer.com/images/cover/.*\.jpg$', + }, + 'playlist_count': 30, + } + + def _real_extract(self, url): + if 'test' not in self._downloader.params: + self._downloader.report_warning('For now, this extractor only supports the 30 second previews. Patches welcome!') + + mobj = re.match(self._VALID_URL, url) + playlist_id = mobj.group('id') + + webpage = self._download_webpage(url, playlist_id) + data_json = self._search_regex( + r'naboo\.display\(\'[^\']+\',\s*(.*?)\);\n', webpage, 'data JSON') + data = json.loads(data_json) + + playlist_title = data.get('DATA', {}).get('TITLE') + playlist_uploader = data.get('DATA', {}).get('PARENT_USERNAME') + playlist_thumbnail = self._search_regex( + r' Date: Sat, 13 Sep 2014 05:54:45 +0200 Subject: [README] Add old Ubuntu versions to FAQ --- README.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/README.md b/README.md index ca366039e..5cc959ac5 100644 --- a/README.md +++ b/README.md @@ -345,6 +345,25 @@ $ youtube-dl --dateafter 20000101 --datebefore 20091231 # FAQ +### I'm getting an error `Unable to extract OpenGraph title` on YouTube playlists + +YouTube changed their playlist format in March 2014 and later on, so you'll need at least youtube-dl 2014.07.25 to download all YouTube videos. + +If you have installed youtube-dl with a package manager, pip, setup.py or a tarball, please use that to update. Note that Ubuntu packages do not seem to get updated anymore. Since we are not affiliated with Ubuntu, there is little we can do. Feel free to report bugs to the Ubuntu packaging guys - all they have to do is update the package to a somewhat recent version. + +Alternatively, uninstall the youtube-dl package and follow [our manual installation instructions](http://rg3.github.io/youtube-dl/download.html). In a pinch, this should do if you used `apt-get` before to install youtube-dl: + +``` +sudo apt-get remove -y youtube-dl +sudo wget https://yt-dl.org/latest/youtube-dl -O /usr/local/bin/youtube-dl +sudo chmod a+x /usr/local/bin/youtube-dl +hash -r +``` + +### Do I always have to pass in `--max-quality FORMAT`, or `-citw`? + +By default, youtube-dl intends to have the best options (incidentally, if you have a convincing case that these should be different, [please file an issue where you explain that](https://yt-dl.org/bug)). Therefore, it is unnecessary and sometimes harmful to copy long option strings from webpages. In particular, `--max-quality` *limits* the video quality (so if you want the best quality, do NOT pass it in), and the only option out of `-citw` that is regularly useful is `-i`. + ### Can you please put the -b option back? Most people asking this question are not aware that youtube-dl now defaults to downloading the highest available quality as reported by YouTube, which will be 1080p or 720p in some cases, so you no longer need the `-b` option. For some specific videos, maybe YouTube does not report them to be available in a specific high quality format you're interested in. In that case, simply request it with the `-f` option and youtube-dl will try to download it. -- cgit v1.2.3 From 89fb6a979765689a744e5e5bfbf3b397ed40b33a Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 13 Sep 2014 06:55:38 +0200 Subject: [spiegel] Add support for articles --- youtube_dl/extractor/__init__.py | 2 +- youtube_dl/extractor/spiegel.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 4423f78a5..f1a5b2608 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -325,7 +325,7 @@ from .southpark import ( ) from .space import SpaceIE from .spankwire import SpankwireIE -from .spiegel import SpiegelIE +from .spiegel import SpiegelIE, SpiegelArticleIE from .spiegeltv import SpiegeltvIE from .spike import SpikeIE from .sportdeutschland import SportDeutschlandIE diff --git a/youtube_dl/extractor/spiegel.py b/youtube_dl/extractor/spiegel.py index 340a38440..a3b89f751 100644 --- a/youtube_dl/extractor/spiegel.py +++ b/youtube_dl/extractor/spiegel.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..utils import compat_urlparse class SpiegelIE(InfoExtractor): @@ -82,3 +83,34 @@ class SpiegelIE(InfoExtractor): 'duration': duration, 'formats': formats, } + + +class SpiegelArticleIE(InfoExtractor): + _VALID_URL = 'https?://www\.spiegel\.de/(?!video/)[^?#]*?-(?P[0-9]+)\.html' + IE_NAME = 'Spiegel:Article' + IE_DESC = 'Articles on spiegel.de' + _TEST = { + 'url': 'http://www.spiegel.de/sport/sonst/badminton-wm-die-randsportart-soll-populaerer-werden-a-987092.html', + 'info_dict': { + 'id': '1516455', + 'ext': 'mp4', + 'title': 'Faszination Badminton: Nennt es bloß nicht Federball', + 'description': 're:^Patrick Kämnitz gehört.{100,}', + }, + } + + def _real_extract(self, url): + m = re.match(self._VALID_URL, url) + video_id = m.group('id') + + webpage = self._download_webpage(url, video_id) + video_link = self._search_regex( + r'>= 8 - signature = (block_size - len(raw_bytes)) * b('\x00') + b('').join(raw_bytes) - if signature[0:2] != b('\x00\x01'): return False - signature = signature[2:] - if not b('\x00') in signature: return False - signature = signature[signature.index(b('\x00'))+1:] - if not signature.startswith(b('\x30\x31\x30\x0D\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x01\x05\x00\x04\x20')): return False - signature = signature[19:] - if signature != sha256(message).digest(): return False - return True - -sys.stderr.write(u'Hi! We changed distribution method and now youtube-dl needs to update itself one more time.\n') -sys.stderr.write(u'This will only happen once. Simply press enter to go on. Sorry for the trouble!\n') -sys.stderr.write(u'From now on, get the binaries from http://rg3.github.io/youtube-dl/download.html, not from the git repository.\n\n') - -try: - raw_input() -except NameError: # Python 3 - input() - -filename = sys.argv[0] - -UPDATE_URL = "http://rg3.github.io/youtube-dl/update/" -VERSION_URL = UPDATE_URL + 'LATEST_VERSION' -JSON_URL = UPDATE_URL + 'versions.json' -UPDATES_RSA_KEY = (0x9d60ee4d8f805312fdb15a62f87b95bd66177b91df176765d13514a0f1754bcd2057295c5b6f1d35daa6742c3ffc9a82d3e118861c207995a8031e151d863c9927e304576bc80692bc8e094896fcf11b66f3e29e04e3a71e9a11558558acea1840aec37fc396fb6b65dc81a1c4144e03bd1c011de62e3f1357b327d08426fe93, 65537) - -if not os.access(filename, os.W_OK): - sys.exit('ERROR: no write permissions on %s' % filename) - -try: - versions_info = compat_urllib_request.urlopen(JSON_URL).read().decode('utf-8') - versions_info = json.loads(versions_info) -except: - sys.exit(u'ERROR: can\'t obtain versions info. Please try again later.') -if not 'signature' in versions_info: - sys.exit(u'ERROR: the versions file is not signed or corrupted. Aborting.') -signature = versions_info['signature'] -del versions_info['signature'] -if not rsa_verify(json.dumps(versions_info, sort_keys=True).encode('utf-8'), signature, UPDATES_RSA_KEY): - sys.exit(u'ERROR: the versions file signature is invalid. Aborting.') - -version = versions_info['versions'][versions_info['latest']] - -try: - urlh = compat_urllib_request.urlopen(version['bin'][0]) - newcontent = urlh.read() - urlh.close() -except (IOError, OSError) as err: - sys.exit('ERROR: unable to download latest version') - -newcontent_hash = hashlib.sha256(newcontent).hexdigest() -if newcontent_hash != version['bin'][1]: - sys.exit(u'ERROR: the downloaded file hash does not match. Aborting.') - -try: - with open(filename, 'wb') as outf: - outf.write(newcontent) -except (IOError, OSError) as err: - sys.exit('ERROR: unable to overwrite current version') - -sys.stderr.write(u'Done! Now you can run youtube-dl.\n') diff --git a/youtube-dl.exe b/youtube-dl.exe deleted file mode 100644 index 45eee04bb..000000000 Binary files a/youtube-dl.exe and /dev/null differ -- cgit v1.2.3 From 3d9fae1ed2da722faed44d4f89143f05797ab4d9 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 13 Sep 2014 07:07:39 +0200 Subject: Add support for PornoXO --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/pornoxo.py | 66 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 youtube_dl/extractor/pornoxo.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index f1a5b2608..1bf5c51b4 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -267,6 +267,7 @@ from .podomatic import PodomaticIE from .pornhd import PornHdIE from .pornhub import PornHubIE from .pornotube import PornotubeIE +from .pornoxo import PornoXOIE from .promptfile import PromptFileIE from .prosiebensat1 import ProSiebenSat1IE from .pyvideo import PyvideoIE diff --git a/youtube_dl/extractor/pornoxo.py b/youtube_dl/extractor/pornoxo.py new file mode 100644 index 000000000..0d9656d39 --- /dev/null +++ b/youtube_dl/extractor/pornoxo.py @@ -0,0 +1,66 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + parse_duration, + str_to_int, +) + +class PornoXOIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?pornoxo\.com/videos/(?P\d+)/(?P[^/]+)\.html' + _TEST = { + 'url': 'http://www.pornoxo.com/videos/7564/striptease-from-sexy-secretary.html', + 'md5': '582f28ecbaa9e6e24cb90f50f524ce87', + 'info_dict': { + 'id': '7564', + 'ext': 'flv', + 'title': 'Striptease From Sexy Secretary!', + 'description': 'Striptease From Sexy Secretary!', + 'categories': list, # NSFW + 'thumbnail': 're:https?://.*\.jpg$', + 'age_limit': 18, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + + video_url = self._html_search_regex( + r'\'file\'\s*:\s*"([^"]+)"', webpage, 'video_url') + + title = self._html_search_regex( + r'([^<]+)\s*-\s*PornoXO', webpage, 'title') + + description = self._html_search_regex( + r'<meta name="description" content="([^"]+)\s*featuring', + webpage, 'description', fatal=False) + + thumbnail = self._html_search_regex( + r'\'image\'\s*:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False) + + view_count = str_to_int(self._html_search_regex( + r'Views:\s*(\d+)', webpage, 'view count', fatal=False)) + + categories_str = self._html_search_regex( + r'<meta name="description" content=".*featuring\s*([^"]+)"', + webpage, 'categories', fatal=False) + categories = ( + None if categories_str is None + else categories_str.split(',')) + + + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'categories': categories, + 'view_count': view_count, + 'age_limit': 18, + } -- cgit v1.2.3 From f4a3490cbc8a6cae0a10b4ca68c2a40a41dddf83 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 13 Sep 2014 07:09:39 +0200 Subject: [pornoxo] Fix test (view count) --- youtube_dl/extractor/pornoxo.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/pornoxo.py b/youtube_dl/extractor/pornoxo.py index 0d9656d39..202f58673 100644 --- a/youtube_dl/extractor/pornoxo.py +++ b/youtube_dl/extractor/pornoxo.py @@ -4,10 +4,10 @@ import re from .common import InfoExtractor from ..utils import ( - parse_duration, str_to_int, ) + class PornoXOIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?pornoxo\.com/videos/(?P<id>\d+)/(?P<display_id>[^/]+)\.html' _TEST = { @@ -44,7 +44,7 @@ class PornoXOIE(InfoExtractor): r'\'image\'\s*:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False) view_count = str_to_int(self._html_search_regex( - r'Views:\s*(\d+)', webpage, 'view count', fatal=False)) + r'[vV]iews:\s*([0-9,]+)', webpage, 'view count', fatal=False)) categories_str = self._html_search_regex( r'<meta name="description" content=".*featuring\s*([^"]+)"', @@ -53,7 +53,6 @@ class PornoXOIE(InfoExtractor): None if categories_str is None else categories_str.split(',')) - return { 'id': video_id, 'url': video_url, -- cgit v1.2.3 From 81127aa55dc9eb4e971d841573e1a1661c9c18b6 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 13 Sep 2014 07:19:20 +0200 Subject: [youtube] Move playlist test to extractor --- youtube_dl/extractor/youtube.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index e28db2b5a..10d7baef6 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1020,6 +1020,13 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): _MORE_PAGES_INDICATOR = r'data-link-type="next"' _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)' IE_NAME = u'youtube:playlist' + _TESTS = [{ + 'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re', + 'info_dict': { + 'title': 'ytdl test PL', + }, + 'playlist_count': 3, + }] def _real_initialize(self): self._login() @@ -1118,6 +1125,7 @@ class YoutubeTopListIE(YoutubePlaylistIE): IE_DESC = (u'YouTube.com top lists, "yttoplist:{channel}:{list title}"' u' (Example: "yttoplist:music:Top Tracks")') _VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$' + _TESTS = [] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -1447,6 +1455,7 @@ class YoutubeSubscriptionsIE(YoutubePlaylistIE): IE_NAME = u'youtube:subscriptions' IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)' _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?' + _TESTS = [] def _real_extract(self, url): title = u'Youtube Subscriptions' -- cgit v1.2.3 From 9291475f7da52bc7c7a7e6dada7e311197d3c7be Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 13 Sep 2014 07:31:48 +0200 Subject: [youtube] Move more tests to Playlist IE, and simply suitable. --- test/test_youtube_lists.py | 46 +---------------------------------------- youtube_dl/extractor/youtube.py | 41 +++++++++++++++++++++++++++++------- 2 files changed, 35 insertions(+), 52 deletions(-) diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index 3aadedd64..1fa99f88b 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -25,15 +25,6 @@ class TestYoutubeLists(unittest.TestCase): """Make sure the info has '_type' set to 'playlist'""" self.assertEqual(info['_type'], 'playlist') - def test_youtube_playlist(self): - dl = FakeYDL() - ie = YoutubePlaylistIE(dl) - result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re') - self.assertIsPlaylist(result) - self.assertEqual(result['title'], 'ytdl test PL') - ytie_results = [YoutubeIE().extract_id(url['url']) for url in result['entries']] - self.assertEqual(ytie_results, [ 'bV9L5Ht9LgY', 'FXxLjLQi3Fg', 'tU3Bgo5qJZE']) - def test_youtube_playlist_noplaylist(self): dl = FakeYDL() dl.params['noplaylist'] = True @@ -41,36 +32,7 @@ class TestYoutubeLists(unittest.TestCase): result = ie.extract('https://www.youtube.com/watch?v=FXxLjLQi3Fg&list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re') self.assertEqual(result['_type'], 'url') self.assertEqual(YoutubeIE().extract_id(result['url']), 'FXxLjLQi3Fg') - - def test_issue_673(self): - dl = FakeYDL() - ie = YoutubePlaylistIE(dl) - result = ie.extract('PLBB231211A4F62143') - self.assertTrue(len(result['entries']) > 25) - - def test_youtube_playlist_long(self): - dl = FakeYDL() - ie = YoutubePlaylistIE(dl) - result = ie.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q') - self.assertIsPlaylist(result) - self.assertTrue(len(result['entries']) >= 799) - - def test_youtube_playlist_with_deleted(self): - #651 - dl = FakeYDL() - ie = YoutubePlaylistIE(dl) - result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC') - ytie_results = [YoutubeIE().extract_id(url['url']) for url in result['entries']] - self.assertFalse('pElCt5oNDuI' in ytie_results) - self.assertFalse('KdPEApIVdWM' in ytie_results) - - def test_youtube_playlist_empty(self): - dl = FakeYDL() - ie = YoutubePlaylistIE(dl) - result = ie.extract('https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx') - self.assertIsPlaylist(result) - self.assertEqual(len(result['entries']), 0) - + def test_youtube_course(self): dl = FakeYDL() ie = YoutubePlaylistIE(dl) @@ -97,12 +59,6 @@ class TestYoutubeLists(unittest.TestCase): result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation') self.assertTrue(len(result['entries']) >= 320) - def test_youtube_safe_search(self): - dl = FakeYDL() - ie = YoutubePlaylistIE(dl) - result = ie.extract('PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl') - self.assertEqual(len(result['entries']), 2) - def test_youtube_show(self): dl = FakeYDL() ie = YoutubeShowIE(dl) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 10d7baef6..47ff53a35 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -221,6 +221,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): ) )? # all until now is optional -> you can pass the naked ID ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID + (?!.*?&list=) # combined list/video URLs are handled by the playlist IE (?(1).+)? # if we found the ID, everything can follow $""" _NEXT_URL_RE = r'[\?&]next_url=([^&]+)' @@ -387,13 +388,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): }, ] - - @classmethod - def suitable(cls, url): - """Receives a URL and returns True if suitable for this IE.""" - if YoutubePlaylistIE.suitable(url): return False - return re.match(cls._VALID_URL, url) is not None - def __init__(self, *args, **kwargs): super(YoutubeIE, self).__init__(*args, **kwargs) self._player_cache = {} @@ -1026,6 +1020,39 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): 'title': 'ytdl test PL', }, 'playlist_count': 3, + }, { + 'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx', + 'info_dict': { + 'title': 'YDL_Empty_List', + }, + 'playlist_count': 0, + }, { + 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.', + 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC', + 'info_dict': { + 'title': '29C3: Not my department', + }, + 'playlist_count': 95, + }, { + 'note': 'issue #673', + 'url': 'PLBB231211A4F62143', + 'info_dict': { + 'title': 'Team Fortress 2 (Class-based LP)', + }, + 'playlist_mincount': 26, + }, { + 'note': 'Large playlist', + 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q', + 'info_dict': { + 'title': 'Uploads from Cauchemar', + }, + 'playlist_mincount': 799, + }, { + 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl', + 'info_dict': { + 'title': 'YDL_safe_search', + }, + 'playlist_count': 2, }] def _real_initialize(self): -- cgit v1.2.3 From 5309602106a0365745de6d5f5c7cd248b54d8d43 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 13 Sep 2014 07:35:27 +0200 Subject: [prosiebensat1] Fix bitrate calculation --- youtube_dl/extractor/prosiebensat1.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index da64a1a7b..421998e91 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -249,7 +249,7 @@ class ProSiebenSat1IE(InfoExtractor): urls_sources = urls_sources.values() def fix_bitrate(bitrate): - return bitrate / 1000 if bitrate % 1000 == 0 else bitrate + return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate for source in urls_sources: protocol = source['protocol'] -- cgit v1.2.3 From 6f6ed04172af7caf2a7dcef706a4f17d11872c90 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 13 Sep 2014 07:39:26 +0200 Subject: [prosiebensat1] Make upload_date optional The site has a crazy incorrect date in German formatting, but the correct date is nowhere to be found. --- youtube_dl/extractor/prosiebensat1.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 421998e91..5b2a723c1 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -145,7 +145,6 @@ class ProSiebenSat1IE(InfoExtractor): 'ext': 'mp4', 'title': 'Kurztrips zum Valentinstag', 'description': 'md5:8ba6301e70351ae0bedf8da00f7ba528', - 'upload_date': '20130206', 'duration': 307.24, }, 'params': { @@ -240,7 +239,7 @@ class ProSiebenSat1IE(InfoExtractor): thumbnail = self._og_search_thumbnail(page) upload_date = unified_strdate(self._html_search_regex( - self._UPLOAD_DATE_REGEXES, page, 'upload date', fatal=False)) + self._UPLOAD_DATE_REGEXES, page, 'upload date', default=None)) formats = [] -- cgit v1.2.3 From c3c3fe476888898e5f70da657827f2d958129b03 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 13 Sep 2014 07:41:26 +0200 Subject: [swrmediathek] Remove deleted video test case --- youtube_dl/extractor/swrmediathek.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/youtube_dl/extractor/swrmediathek.py b/youtube_dl/extractor/swrmediathek.py index 5d9d70367..13c6ea677 100644 --- a/youtube_dl/extractor/swrmediathek.py +++ b/youtube_dl/extractor/swrmediathek.py @@ -52,20 +52,6 @@ class SWRMediathekIE(InfoExtractor): 'uploader': 'SWR 2', 'uploader_id': '284670', } - }, { - 'url': 'http://swrmediathek.de/content/player.htm?show=52dc7e00-15c5-11e4-84bc-0026b975f2e6', - 'md5': '881531487d0633080a8cc88d31ef896f', - 'info_dict': { - 'id': '52dc7e00-15c5-11e4-84bc-0026b975f2e6', - 'ext': 'mp4', - 'title': 'Familienspaß am Bodensee', - 'description': 'md5:0b591225a32cfde7be1629ed49fe4315', - 'thumbnail': 're:http://.*\.jpg', - 'duration': 1784, - 'upload_date': '20140727', - 'uploader': 'SWR Fernsehen BW', - 'uploader_id': '281130', - } }] def _real_extract(self, url): -- cgit v1.2.3 From 1ad6efd9b64d41e2b2fcce02749970a21a25cbeb Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 13 Sep 2014 07:46:19 +0200 Subject: [spiegel] Remove deleted video from test cases --- youtube_dl/extractor/spiegel.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/youtube_dl/extractor/spiegel.py b/youtube_dl/extractor/spiegel.py index a3b89f751..9ed7d3b39 100644 --- a/youtube_dl/extractor/spiegel.py +++ b/youtube_dl/extractor/spiegel.py @@ -29,16 +29,6 @@ class SpiegelIE(InfoExtractor): 'description': 'md5:c2322b65e58f385a820c10fa03b2d088', 'duration': 983, }, - }, { - 'url': 'http://www.spiegel.de/video/johann-westhauser-videobotschaft-des-hoehlenforschers-video-1502367.html', - 'md5': '54f58ba0e752e3c07bc2a26222dd0acf', - 'info_dict': { - 'id': '1502367', - 'ext': 'mp4', - 'title': 'Videobotschaft: Höhlenforscher Westhauser dankt seinen Rettern', - 'description': 'md5:c6f1ec11413ebd1088b6813943e5fc91', - 'duration': 42, - }, }] def _real_extract(self, url): -- cgit v1.2.3 From 67b7e712d8ef5d67b6aa6d382ae43a131ec1f822 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 13 Sep 2014 07:47:07 +0200 Subject: [xhamster] Remove md5sums from tests We're now checking that the file is sufficiently large by default, and they seem to reencode the videos from time to time. --- youtube_dl/extractor/xhamster.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index 00b6d1eba..4e8fbde8d 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -18,7 +18,6 @@ class XHamsterIE(InfoExtractor): _TESTS = [ { 'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html', - 'md5': '8281348b8d3c53d39fffb377d24eac4e', 'info_dict': { 'id': '1509445', 'ext': 'mp4', @@ -31,7 +30,6 @@ class XHamsterIE(InfoExtractor): }, { 'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd', - 'md5': '4cbd8d56708ecb4fb4124c23e4acb81a', 'info_dict': { 'id': '2221348', 'ext': 'mp4', -- cgit v1.2.3 From 310d75d141ad51a907a8e06214302409cef18a02 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 13 Sep 2014 07:49:26 +0200 Subject: [youtube] Correct description in test case --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 47ff53a35..150778592 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -360,7 +360,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): u"info_dict": { u"upload_date": "20121002", u"uploader_id": "8KVIDEO", - u"description": "No description available.", + u"description": '', u"uploader": "8KVIDEO", u"title": "UHDTV TEST 8K VIDEO.mp4" }, -- cgit v1.2.3 From 78caa52aea356a60b6efbe92484d6bdea1fe7432 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 13 Sep 2014 07:51:06 +0200 Subject: [youtube] Modernize --- youtube_dl/extractor/youtube.py | 251 ++++++++++++++++++++-------------------- 1 file changed, 127 insertions(+), 124 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 150778592..b54c69122 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1,5 +1,8 @@ # coding: utf-8 +from __future__ import unicode_literals + + import itertools import json import os.path @@ -69,29 +72,29 @@ class YoutubeBaseInfoExtractor(InfoExtractor): return galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"', - login_page, u'Login GALX parameter') + login_page, 'Login GALX parameter') # Log in login_form_strs = { - u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1', - u'Email': username, - u'GALX': galx, - u'Passwd': password, - - u'PersistentCookie': u'yes', - u'_utf8': u'霱', - u'bgresponse': u'js_disabled', - u'checkConnection': u'', - u'checkedDomains': u'youtube', - u'dnConn': u'', - u'pstMsg': u'0', - u'rmShown': u'1', - u'secTok': u'', - u'signIn': u'Sign in', - u'timeStmp': u'', - u'service': u'youtube', - u'uilel': u'3', - u'hl': u'en_US', + 'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1', + 'Email': username, + 'GALX': galx, + 'Passwd': password, + + 'PersistentCookie': 'yes', + '_utf8': '霱', + 'bgresponse': 'js_disabled', + 'checkConnection': '', + 'checkedDomains': 'youtube', + 'dnConn': '', + 'pstMsg': '0', + 'rmShown': '1', + 'secTok': '', + 'signIn': 'Sign in', + 'timeStmp': '', + 'service': 'youtube', + 'uilel': '3', + 'hl': 'en_US', } # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode @@ -132,19 +135,19 @@ class YoutubeBaseInfoExtractor(InfoExtractor): timeStmp = match.group(1) tfa_form_strs = { - u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1', - u'smsToken': u'', - u'smsUserPin': tfa_code, - u'smsVerifyPin': u'Verify', - - u'PersistentCookie': u'yes', - u'checkConnection': u'', - u'checkedDomains': u'youtube', - u'pstMsg': u'1', - u'secTok': secTok, - u'timeStmp': timeStmp, - u'service': u'youtube', - u'hl': u'en_US', + 'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1', + 'smsToken': '', + 'smsUserPin': tfa_code, + 'smsVerifyPin': 'Verify', + + 'PersistentCookie': 'yes', + 'checkConnection': '', + 'checkedDomains': 'youtube', + 'pstMsg': '1', + 'secTok': secTok, + 'timeStmp': timeStmp, + 'service': 'youtube', + 'hl': 'en_US', } tfa_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in tfa_form_strs.items()) tfa_data = compat_urllib_parse.urlencode(tfa_form).encode('ascii') @@ -196,7 +199,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): - IE_DESC = u'YouTube.com' + IE_DESC = 'YouTube.com' _VALID_URL = r"""(?x)^ ( (?:https?://|//) # http(s):// or protocol-independent URL @@ -301,7 +304,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): '_rtmp': {'protocol': 'rtmp'}, } - IE_NAME = u'youtube' + IE_NAME = 'youtube' _TESTS = [ { u"url": u"http://www.youtube.com/watch?v=BaW_jenozKc", @@ -371,19 +374,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): }, # DASH manifest with encrypted signature { - u'url': u'https://www.youtube.com/watch?v=IB3lcPjvWLA', - u'info_dict': { - u'id': u'IB3lcPjvWLA', - u'ext': u'm4a', - u'title': u'Afrojack - The Spark ft. Spree Wilson', - u'description': u'md5:9717375db5a9a3992be4668bbf3bc0a8', - u'uploader': u'AfrojackVEVO', - u'uploader_id': u'AfrojackVEVO', - u'upload_date': u'20131011', + 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA', + 'info_dict': { + 'id': 'IB3lcPjvWLA', + 'ext': 'm4a', + 'title': 'Afrojack - The Spark ft. Spree Wilson', + 'description': 'md5:9717375db5a9a3992be4668bbf3bc0a8', + 'uploader': 'AfrojackVEVO', + 'uploader_id': 'AfrojackVEVO', + 'upload_date': '20131011', }, u"params": { - u'youtube_include_dash_manifest': True, - u'format': '141', + 'youtube_include_dash_manifest': True, + 'format': '141', }, }, ] @@ -410,7 +413,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): def _signature_cache_id(self, example_sig): """ Return a string representation of a signature """ - return u'.'.join(compat_str(len(part)) for part in example_sig.split('.')) + return '.'.join(compat_str(len(part)) for part in example_sig.split('.')) def _extract_signature_function(self, video_id, player_url, example_sig): id_m = re.match( @@ -428,7 +431,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): cache_spec = self._downloader.cache.load(u'youtube-sigfuncs', func_id) if cache_spec is not None: - return lambda s: u''.join(s[i] for i in cache_spec) + return lambda s: ''.join(s[i] for i in cache_spec) if player_type == 'js': code = self._download_webpage( @@ -447,7 +450,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): assert False, 'Invalid player type %r' % player_type if cache_spec is None: - test_string = u''.join(map(compat_chr, range(len(example_sig)))) + test_string = ''.join(map(compat_chr, range(len(example_sig)))) cache_res = res(test_string) cache_spec = [ord(c) for c in cache_res] @@ -457,10 +460,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): def _print_sig_code(self, func, example_sig): def gen_sig_code(idxs): def _genslice(start, end, step): - starts = u'' if start == 0 else str(start) - ends = (u':%d' % (end+step)) if end + step >= 0 else u':' - steps = u'' if step == 1 else (u':%d' % step) - return u's[%s%s%s]' % (starts, ends, steps) + starts = '' if start == 0 else str(start) + ends = (u':%d' % (end+step)) if end + step >= 0 else ':' + steps = '' if step == 1 else (u':%d' % step) + return 's[%s%s%s]' % (starts, ends, steps) step = None start = '(Never used)' # Quelch pyflakes warnings - start will be @@ -477,26 +480,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): start = prev continue else: - yield u's[%d]' % prev + yield 's[%d]' % prev if step is None: - yield u's[%d]' % i + yield 's[%d]' % i else: yield _genslice(start, i, step) - test_string = u''.join(map(compat_chr, range(len(example_sig)))) + test_string = ''.join(map(compat_chr, range(len(example_sig)))) cache_res = func(test_string) cache_spec = [ord(c) for c in cache_res] - expr_code = u' + '.join(gen_sig_code(cache_spec)) + expr_code = ' + '.join(gen_sig_code(cache_spec)) signature_id_tuple = '(%s)' % ( ', '.join(compat_str(len(p)) for p in example_sig.split('.'))) code = (u'if tuple(len(p) for p in s.split(\'.\')) == %s:\n' - u' return %s\n') % (signature_id_tuple, expr_code) + ' return %s\n') % (signature_id_tuple, expr_code) self.to_screen(u'Extracted signature function:\n' + code) def _parse_sig_js(self, jscode): funcname = self._search_regex( r'signature=([$a-zA-Z]+)', jscode, - u'Initial JS player signature function name') + 'Initial JS player signature function name') jsi = JSInterpreter(jscode) initial_function = jsi.extract_function(funcname) @@ -504,9 +507,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): def _parse_sig_swf(self, file_contents): swfi = SWFInterpreter(file_contents) - TARGET_CLASSNAME = u'SignatureDecipher' + TARGET_CLASSNAME = 'SignatureDecipher' searched_class = swfi.extract_class(TARGET_CLASSNAME) - initial_function = swfi.extract_function(searched_class, u'decipher') + initial_function = swfi.extract_function(searched_class, 'decipher') return lambda s: initial_function([s]) def _decrypt_signature(self, s, video_id, player_url, age_gate=False): @@ -516,7 +519,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): raise ExtractorError(u'Cannot decrypt signature without player_url') if player_url.startswith(u'//'): - player_url = u'https:' + player_url + player_url = 'https:' + player_url try: player_id = (player_url, self._signature_cache_id(s)) if player_id not in self._player_cache: @@ -531,7 +534,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): except Exception as e: tb = traceback.format_exc() raise ExtractorError( - u'Signature extraction failed: ' + tb, cause=e) + 'Signature extraction failed: ' + tb, cause=e) def _get_available_subtitles(self, video_id, webpage): try: @@ -554,7 +557,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): 'fmt': self._downloader.params.get('subtitlesformat', 'srt'), 'name': unescapeHTML(l[0]).encode('utf-8'), }) - url = u'https://www.youtube.com/api/timedtext?' + params + url = 'https://www.youtube.com/api/timedtext?' + params sub_lang_list[lang] = url if not sub_lang_list: self._downloader.report_warning(u'video doesn\'t have subtitles') @@ -567,7 +570,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): sub_format = self._downloader.params.get('subtitlesformat', 'srt') self.to_screen(u'%s: Looking for automatic captions' % video_id) mobj = re.search(r';ytplayer.config = ({.*?});', webpage) - err_msg = u'Couldn\'t find automatic captions for %s' % video_id + err_msg = 'Couldn\'t find automatic captions for %s' % video_id if mobj is None: self._downloader.report_warning(err_msg) return {} @@ -623,7 +626,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): urls = filter(lambda l: l and not l.startswith('#'), lines) return urls - manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest') + manifest = self._download_webpage(manifest_url, video_id, 'Downloading formats manifest') formats_urls = _get_urls(manifest) for format_url in formats_urls: itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag') @@ -636,8 +639,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): def _real_extract(self, url): proto = ( - u'http' if self._downloader.params.get('prefer_insecure', False) - else u'https') + 'http' if self._downloader.params.get('prefer_insecure', False) + else 'https') # Extract original video URL from URL with redirection, like age verification, using next_url parameter mobj = re.search(self._NEXT_URL_RE, url) @@ -688,11 +691,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): if 'token' not in video_info: if 'reason' in video_info: raise ExtractorError( - u'YouTube said: %s' % video_info['reason'][0], + 'YouTube said: %s' % video_info['reason'][0], expected=True, video_id=video_id) else: raise ExtractorError( - u'"token" parameter not in video info for unknown reason', + '"token" parameter not in video info for unknown reason', video_id=video_id) if 'view_count' in video_info: @@ -725,7 +728,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): video_title = video_info['title'][0] else: self._downloader.report_warning(u'Unable to extract video title') - video_title = u'_' + video_title = '_' # thumbnail image # We try first to get a high quality image: @@ -779,7 +782,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): if fd_mobj: video_description = unescapeHTML(fd_mobj.group(1)) else: - video_description = u'' + video_description = '' def _extract_count(count_name): count = self._search_regex( @@ -826,7 +829,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): if m_s is not None: self.to_screen(u'%s: Encrypted signatures detected.' % video_id) video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']] - m_s = re_signature.search(args.get('adaptive_fmts', u'')) + m_s = re_signature.search(args.get('adaptive_fmts', '')) if m_s is not None: if 'adaptive_fmts' in video_info: video_info['adaptive_fmts'][0] += ',' + args['adaptive_fmts'] @@ -876,12 +879,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): if not age_gate: jsplayer_url_json = self._search_regex( r'"assets":.+?"js":\s*("[^"]+")', - video_webpage, u'JS player URL') + video_webpage, 'JS player URL') player_url = json.loads(jsplayer_url_json) if player_url is None: player_url_json = self._search_regex( r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")', - video_webpage, u'age gate player URL') + video_webpage, 'age gate player URL') player_url = json.loads(player_url_json) if self._downloader.params.get('verbose'): @@ -892,14 +895,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): if player_url.endswith('swf'): player_version = self._search_regex( r'-(.+?)(?:/watch_as3)?\.swf$', player_url, - u'flash player', fatal=False) + 'flash player', fatal=False) player_desc = 'flash player %s' % player_version else: player_version = self._search_regex( r'html5player-([^/]+?)(?:/html5player)?\.js', player_url, 'html5 player', fatal=False) - player_desc = u'html5 player %s' % player_version + player_desc = 'html5 player %s' % player_version parts_sizes = self._signature_cache_id(encrypted_sig) self.to_screen(u'{%s} signature length %s, %s' % @@ -991,7 +994,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): } class YoutubePlaylistIE(YoutubeBaseInfoExtractor): - IE_DESC = u'YouTube.com playlists' + IE_DESC = 'YouTube.com playlists' _VALID_URL = r"""(?x)(?: (?:https?://)? (?:\w+\.)? @@ -1013,7 +1016,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s' _MORE_PAGES_INDICATOR = r'data-link-type="next"' _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)' - IE_NAME = u'youtube:playlist' + IE_NAME = 'youtube:playlist' _TESTS = [{ 'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re', 'info_dict': { @@ -1068,7 +1071,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): # the id of the playlist is just 'RD' + video_id url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id) webpage = self._download_webpage( - url, playlist_id, u'Downloading Youtube mix') + url, playlist_id, 'Downloading Youtube mix') search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage) title_span = ( search_title('playlist-title') or @@ -1105,7 +1108,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): return self._extract_mix(playlist_id) if playlist_id.startswith('TL'): raise ExtractorError(u'For downloading YouTube.com top lists, use ' - u'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True) + 'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True) url = self._TEMPLATE_URL % playlist_id page = self._download_webpage(url, playlist_id) @@ -1114,7 +1117,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): # Check if the playlist exists or is private if re.search(r'<div class="yt-alert-message">[^<]*?(The|This) playlist (does not exist|is private)[^<]*?</div>', page) is not None: raise ExtractorError( - u'The playlist doesn\'t exist or is private, use --username or ' + 'The playlist doesn\'t exist or is private, use --username or ' '--netrc to access it.', expected=True) @@ -1141,16 +1144,16 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): playlist_title = self._html_search_regex( r'(?s)<h1 class="pl-header-title[^"]*">\s*(.*?)\s*</h1>', - page, u'title') + page, 'title') url_results = self._ids_to_results(ids) return self.playlist_result(url_results, playlist_id, playlist_title) class YoutubeTopListIE(YoutubePlaylistIE): - IE_NAME = u'youtube:toplist' + IE_NAME = 'youtube:toplist' IE_DESC = (u'YouTube.com top lists, "yttoplist:{channel}:{list title}"' - u' (Example: "yttoplist:music:Top Tracks")') + ' (Example: "yttoplist:music:Top Tracks")') _VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$' _TESTS = [] @@ -1161,7 +1164,7 @@ class YoutubeTopListIE(YoutubePlaylistIE): query = compat_urllib_parse.urlencode({'title': title}) playlist_re = 'href="([^"]+?%s.*?)"' % re.escape(query) channel_page = self._download_webpage('https://www.youtube.com/%s' % channel, title) - link = self._html_search_regex(playlist_re, channel_page, u'list') + link = self._html_search_regex(playlist_re, channel_page, 'list') url = compat_urlparse.urljoin('https://www.youtube.com/', link) video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"' @@ -1169,7 +1172,7 @@ class YoutubeTopListIE(YoutubePlaylistIE): # sometimes the webpage doesn't contain the videos # retry until we get them for i in itertools.count(0): - msg = u'Downloading Youtube mix' + msg = 'Downloading Youtube mix' if i > 0: msg += ', retry #%d' % i @@ -1182,11 +1185,11 @@ class YoutubeTopListIE(YoutubePlaylistIE): class YoutubeChannelIE(InfoExtractor): - IE_DESC = u'YouTube.com channels' + IE_DESC = 'YouTube.com channels' _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)" _MORE_PAGES_INDICATOR = 'yt-uix-load-more' _MORE_PAGES_URL = 'https://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s' - IE_NAME = u'youtube:channel' + IE_NAME = 'youtube:channel' def extract_videos_from_page(self, page): ids_in_page = [] @@ -1238,12 +1241,12 @@ class YoutubeChannelIE(InfoExtractor): class YoutubeUserIE(InfoExtractor): - IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)' + IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)' _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)' _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s' _GDATA_PAGE_SIZE = 50 _GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json' - IE_NAME = u'youtube:user' + IE_NAME = 'youtube:user' @classmethod def suitable(cls, url): @@ -1272,7 +1275,7 @@ class YoutubeUserIE(InfoExtractor): gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index) page = self._download_webpage( gdata_url, username, - u'Downloading video ids from %d to %d' % ( + 'Downloading video ids from %d to %d' % ( start_index, start_index + self._GDATA_PAGE_SIZE)) try: @@ -1300,10 +1303,10 @@ class YoutubeUserIE(InfoExtractor): class YoutubeSearchIE(SearchInfoExtractor): - IE_DESC = u'YouTube.com searches' - _API_URL = u'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc' + IE_DESC = 'YouTube.com searches' + _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc' _MAX_RESULTS = 1000 - IE_NAME = u'youtube:search' + IE_NAME = 'youtube:search' _SEARCH_KEY = 'ytsearch' def _get_n_results(self, query, n): @@ -1327,7 +1330,7 @@ class YoutubeSearchIE(SearchInfoExtractor): if 'items' not in api_response: raise ExtractorError( - u'[youtube] No video results', expected=True) + '[youtube] No video results', expected=True) new_ids = list(video['id'] for video in api_response['items']) video_ids += new_ids @@ -1346,12 +1349,12 @@ class YoutubeSearchDateIE(YoutubeSearchIE): IE_NAME = YoutubeSearchIE.IE_NAME + ':date' _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published' _SEARCH_KEY = 'ytsearchdate' - IE_DESC = u'YouTube.com searches, newest videos first' + IE_DESC = 'YouTube.com searches, newest videos first' class YoutubeSearchURLIE(InfoExtractor): - IE_DESC = u'YouTube.com search URLs' - IE_NAME = u'youtube:search_url' + IE_DESC = 'YouTube.com search URLs' + IE_NAME = 'youtube:search_url' _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)' def _real_extract(self, url): @@ -1360,7 +1363,7 @@ class YoutubeSearchURLIE(InfoExtractor): webpage = self._download_webpage(url, query) result_code = self._search_regex( - r'(?s)<ol class="item-section"(.*?)</ol>', webpage, u'result HTML') + r'(?s)<ol class="item-section"(.*?)</ol>', webpage, 'result HTML') part_codes = re.findall( r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code) @@ -1386,14 +1389,14 @@ class YoutubeSearchURLIE(InfoExtractor): class YoutubeShowIE(InfoExtractor): - IE_DESC = u'YouTube.com (multi-season) shows' + IE_DESC = 'YouTube.com (multi-season) shows' _VALID_URL = r'https?://www\.youtube\.com/show/(.*)' - IE_NAME = u'youtube:show' + IE_NAME = 'youtube:show' def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) show_name = mobj.group(1) - webpage = self._download_webpage(url, show_name, u'Downloading show webpage') + webpage = self._download_webpage(url, show_name, 'Downloading show webpage') # There's one playlist for each season of the show m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage)) self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons))) @@ -1419,7 +1422,7 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor): @property def IE_NAME(self): - return u'youtube:%s' % self._FEED_NAME + return 'youtube:%s' % self._FEED_NAME def _real_initialize(self): self._login() @@ -1429,8 +1432,8 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor): paging = 0 for i in itertools.count(1): info = self._download_json(self._FEED_TEMPLATE % paging, - u'%s feed' % self._FEED_NAME, - u'Downloading page %s' % i) + '%s feed' % self._FEED_NAME, + 'Downloading page %s' % i) feed_html = info.get('feed_html') or info.get('content_html') load_more_widget_html = info.get('load_more_widget_html') or feed_html m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html) @@ -1447,45 +1450,45 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor): return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE) class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor): - IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)' + IE_DESC = 'YouTube.com recommended videos, "ytrec" keyword (requires authentication)' _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?' _FEED_NAME = 'recommended' - _PLAYLIST_TITLE = u'Youtube Recommended videos' + _PLAYLIST_TITLE = 'Youtube Recommended videos' class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor): - IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)' + IE_DESC = 'Youtube watch later list, "ytwatchlater" keyword (requires authentication)' _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater' _FEED_NAME = 'watch_later' - _PLAYLIST_TITLE = u'Youtube Watch Later' + _PLAYLIST_TITLE = 'Youtube Watch Later' _PERSONAL_FEED = True class YoutubeHistoryIE(YoutubeFeedsInfoExtractor): - IE_DESC = u'Youtube watch history, "ythistory" keyword (requires authentication)' - _VALID_URL = u'https?://www\.youtube\.com/feed/history|:ythistory' + IE_DESC = 'Youtube watch history, "ythistory" keyword (requires authentication)' + _VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory' _FEED_NAME = 'history' _PERSONAL_FEED = True - _PLAYLIST_TITLE = u'Youtube Watch History' + _PLAYLIST_TITLE = 'Youtube Watch History' class YoutubeFavouritesIE(YoutubeBaseInfoExtractor): - IE_NAME = u'youtube:favorites' - IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)' + IE_NAME = 'youtube:favorites' + IE_DESC = 'YouTube.com favourite videos, "ytfav" keyword (requires authentication)' _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?' _LOGIN_REQUIRED = True def _real_extract(self, url): webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos') - playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id') + playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id') return self.url_result(playlist_id, 'YoutubePlaylist') class YoutubeSubscriptionsIE(YoutubePlaylistIE): - IE_NAME = u'youtube:subscriptions' - IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)' + IE_NAME = 'youtube:subscriptions' + IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)' _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?' _TESTS = [] def _real_extract(self, url): - title = u'Youtube Subscriptions' + title = 'Youtube Subscriptions' page = self._download_webpage('https://www.youtube.com/feed/subscriptions', title) # The extraction process is the same as for playlists, but the regex @@ -1537,9 +1540,9 @@ class YoutubeTruncatedURLIE(InfoExtractor): def _real_extract(self, url): raise ExtractorError( - u'Did you forget to quote the URL? Remember that & is a meta ' - u'character in most shells, so you want to put the URL in quotes, ' - u'like youtube-dl ' - u'"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" ' - u' or simply youtube-dl BaW_jenozKc .', + 'Did you forget to quote the URL? Remember that & is a meta ' + 'character in most shells, so you want to put the URL in quotes, ' + 'like youtube-dl ' + '"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" ' + ' or simply youtube-dl BaW_jenozKc .', expected=True) -- cgit v1.2.3 From 34a741a8eccb5d4b61ae8f7c840d8475f9f3871d Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 13 Sep 2014 07:55:21 +0200 Subject: [options] Move all options to their own file --- youtube_dl/__init__.py | 474 +----------------------------------------------- youtube_dl/options.py | 479 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 480 insertions(+), 473 deletions(-) create mode 100644 youtube_dl/options.py diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 8f1a1a232..cc0a4071f 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -81,10 +81,8 @@ __license__ = 'Public Domain' import codecs import io -import optparse import os import random -import shlex import sys @@ -94,9 +92,9 @@ from .utils import ( DateRange, DEFAULT_OUTTMPL, decodeOption, - get_term_width, DownloadError, MaxDownloadsReached, + parseOpts, preferredencoding, read_batch_urls, SameFileError, @@ -109,7 +107,6 @@ from .downloader import ( FileDownloader, ) from .extractor import gen_extractors -from .version import __version__ from .YoutubeDL import YoutubeDL from .postprocessor import ( AtomicParsleyPP, @@ -123,475 +120,6 @@ from .postprocessor import ( ) -def parseOpts(overrideArguments=None): - def _readOptions(filename_bytes, default=[]): - try: - optionf = open(filename_bytes) - except IOError: - return default # silently skip if file is not present - try: - res = [] - for l in optionf: - res += shlex.split(l, comments=True) - finally: - optionf.close() - return res - - def _readUserConf(): - xdg_config_home = os.environ.get('XDG_CONFIG_HOME') - if xdg_config_home: - userConfFile = os.path.join(xdg_config_home, 'youtube-dl', 'config') - if not os.path.isfile(userConfFile): - userConfFile = os.path.join(xdg_config_home, 'youtube-dl.conf') - else: - userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl', 'config') - if not os.path.isfile(userConfFile): - userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf') - userConf = _readOptions(userConfFile, None) - - if userConf is None: - appdata_dir = os.environ.get('appdata') - if appdata_dir: - userConf = _readOptions( - os.path.join(appdata_dir, 'youtube-dl', 'config'), - default=None) - if userConf is None: - userConf = _readOptions( - os.path.join(appdata_dir, 'youtube-dl', 'config.txt'), - default=None) - - if userConf is None: - userConf = _readOptions( - os.path.join(os.path.expanduser('~'), 'youtube-dl.conf'), - default=None) - if userConf is None: - userConf = _readOptions( - os.path.join(os.path.expanduser('~'), 'youtube-dl.conf.txt'), - default=None) - - if userConf is None: - userConf = [] - - return userConf - - def _format_option_string(option): - ''' ('-o', '--option') -> -o, --format METAVAR''' - - opts = [] - - if option._short_opts: - opts.append(option._short_opts[0]) - if option._long_opts: - opts.append(option._long_opts[0]) - if len(opts) > 1: - opts.insert(1, ', ') - - if option.takes_value(): opts.append(' %s' % option.metavar) - - return "".join(opts) - - def _comma_separated_values_options_callback(option, opt_str, value, parser): - setattr(parser.values, option.dest, value.split(',')) - - def _hide_login_info(opts): - opts = list(opts) - for private_opt in ['-p', '--password', '-u', '--username', '--video-password']: - try: - i = opts.index(private_opt) - opts[i+1] = '<PRIVATE>' - except ValueError: - pass - return opts - - max_width = 80 - max_help_position = 80 - - # No need to wrap help messages if we're on a wide console - columns = get_term_width() - if columns: max_width = columns - - fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position) - fmt.format_option_strings = _format_option_string - - kw = { - 'version' : __version__, - 'formatter' : fmt, - 'usage' : '%prog [options] url [url...]', - 'conflict_handler' : 'resolve', - } - - parser = optparse.OptionParser(**kw) - - # option groups - general = optparse.OptionGroup(parser, 'General Options') - selection = optparse.OptionGroup(parser, 'Video Selection') - authentication = optparse.OptionGroup(parser, 'Authentication Options') - video_format = optparse.OptionGroup(parser, 'Video Format Options') - subtitles = optparse.OptionGroup(parser, 'Subtitle Options') - downloader = optparse.OptionGroup(parser, 'Download Options') - postproc = optparse.OptionGroup(parser, 'Post-processing Options') - filesystem = optparse.OptionGroup(parser, 'Filesystem Options') - workarounds = optparse.OptionGroup(parser, 'Workarounds') - verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') - - general.add_option('-h', '--help', - action='help', help='print this help text and exit') - general.add_option('-v', '--version', - action='version', help='print program version and exit') - general.add_option('-U', '--update', - action='store_true', dest='update_self', help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)') - general.add_option('-i', '--ignore-errors', - action='store_true', dest='ignoreerrors', help='continue on download errors, for example to skip unavailable videos in a playlist', default=False) - general.add_option('--abort-on-error', - action='store_false', dest='ignoreerrors', - help='Abort downloading of further videos (in the playlist or the command line) if an error occurs') - general.add_option('--dump-user-agent', - action='store_true', dest='dump_user_agent', - help='display the current browser identification', default=False) - general.add_option('--list-extractors', - action='store_true', dest='list_extractors', - help='List all supported extractors and the URLs they would handle', default=False) - general.add_option('--extractor-descriptions', - action='store_true', dest='list_extractor_descriptions', - help='Output descriptions of all supported extractors', default=False) - general.add_option( - '--proxy', dest='proxy', default=None, metavar='URL', - help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection') - general.add_option( - '--socket-timeout', dest='socket_timeout', - type=float, default=None, help=u'Time to wait before giving up, in seconds') - general.add_option( - '--default-search', - dest='default_search', metavar='PREFIX', - help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.') - general.add_option( - '--ignore-config', - action='store_true', - help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)') - - selection.add_option( - '--playlist-start', - dest='playliststart', metavar='NUMBER', default=1, type=int, - help='playlist video to start at (default is %default)') - selection.add_option( - '--playlist-end', - dest='playlistend', metavar='NUMBER', default=None, type=int, - help='playlist video to end at (default is last)') - selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)') - selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)') - selection.add_option('--max-downloads', metavar='NUMBER', - dest='max_downloads', type=int, default=None, - help='Abort after downloading NUMBER files') - selection.add_option('--min-filesize', metavar='SIZE', dest='min_filesize', help="Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)", default=None) - selection.add_option('--max-filesize', metavar='SIZE', dest='max_filesize', help="Do not download any videos larger than SIZE (e.g. 50k or 44.6m)", default=None) - selection.add_option('--date', metavar='DATE', dest='date', help='download only videos uploaded in this date', default=None) - selection.add_option( - '--datebefore', metavar='DATE', dest='datebefore', default=None, - help='download only videos uploaded on or before this date (i.e. inclusive)') - selection.add_option( - '--dateafter', metavar='DATE', dest='dateafter', default=None, - help='download only videos uploaded on or after this date (i.e. inclusive)') - selection.add_option( - '--min-views', metavar='COUNT', dest='min_views', - default=None, type=int, - help="Do not download any videos with less than COUNT views",) - selection.add_option( - '--max-views', metavar='COUNT', dest='max_views', - default=None, type=int, - help="Do not download any videos with more than COUNT views",) - selection.add_option('--no-playlist', action='store_true', dest='noplaylist', help='download only the currently playing video', default=False) - selection.add_option('--age-limit', metavar='YEARS', dest='age_limit', - help='download only videos suitable for the given age', - default=None, type=int) - selection.add_option('--download-archive', metavar='FILE', - dest='download_archive', - help='Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.') - selection.add_option( - '--include-ads', dest='include_ads', - action='store_true', - help='Download advertisements as well (experimental)') - selection.add_option( - '--youtube-include-dash-manifest', action='store_true', - dest='youtube_include_dash_manifest', default=False, - help='Try to download the DASH manifest on YouTube videos (experimental)') - - authentication.add_option('-u', '--username', - dest='username', metavar='USERNAME', help='account username') - authentication.add_option('-p', '--password', - dest='password', metavar='PASSWORD', help='account password') - authentication.add_option('-2', '--twofactor', - dest='twofactor', metavar='TWOFACTOR', help='two-factor auth code') - authentication.add_option('-n', '--netrc', - action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False) - authentication.add_option('--video-password', - dest='videopassword', metavar='PASSWORD', help='video password (vimeo, smotri)') - - - video_format.add_option('-f', '--format', - action='store', dest='format', metavar='FORMAT', default=None, - help='video format code, specify the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported. You can also use the special names "best", "bestvideo", "bestaudio", "worst", "worstvideo" and "worstaudio". By default, youtube-dl will pick the best quality.') - video_format.add_option('--all-formats', - action='store_const', dest='format', help='download all available video formats', const='all') - video_format.add_option('--prefer-free-formats', - action='store_true', dest='prefer_free_formats', default=False, help='prefer free video formats unless a specific one is requested') - video_format.add_option('--max-quality', - action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download') - video_format.add_option('-F', '--list-formats', - action='store_true', dest='listformats', help='list all available formats') - - subtitles.add_option('--write-sub', '--write-srt', - action='store_true', dest='writesubtitles', - help='write subtitle file', default=False) - subtitles.add_option('--write-auto-sub', '--write-automatic-sub', - action='store_true', dest='writeautomaticsub', - help='write automatic subtitle file (youtube only)', default=False) - subtitles.add_option('--all-subs', - action='store_true', dest='allsubtitles', - help='downloads all the available subtitles of the video', default=False) - subtitles.add_option('--list-subs', - action='store_true', dest='listsubtitles', - help='lists all available subtitles for the video', default=False) - subtitles.add_option('--sub-format', - action='store', dest='subtitlesformat', metavar='FORMAT', - help='subtitle format (default=srt) ([sbv/vtt] youtube only)', default='srt') - subtitles.add_option('--sub-lang', '--sub-langs', '--srt-lang', - action='callback', dest='subtitleslangs', metavar='LANGS', type='str', - default=[], callback=_comma_separated_values_options_callback, - help='languages of the subtitles to download (optional) separated by commas, use IETF language tags like \'en,pt\'') - - downloader.add_option('-r', '--rate-limit', - dest='ratelimit', metavar='LIMIT', help='maximum download rate in bytes per second (e.g. 50K or 4.2M)') - downloader.add_option('-R', '--retries', - dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10) - downloader.add_option('--buffer-size', - dest='buffersize', metavar='SIZE', help='size of download buffer (e.g. 1024 or 16K) (default is %default)', default="1024") - downloader.add_option('--no-resize-buffer', - action='store_true', dest='noresizebuffer', - help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.', default=False) - downloader.add_option('--test', action='store_true', dest='test', default=False, help=optparse.SUPPRESS_HELP) - - workarounds.add_option( - '--encoding', dest='encoding', metavar='ENCODING', - help='Force the specified encoding (experimental)') - workarounds.add_option( - '--no-check-certificate', action='store_true', - dest='no_check_certificate', default=False, - help='Suppress HTTPS certificate validation.') - workarounds.add_option( - '--prefer-insecure', '--prefer-unsecure', action='store_true', dest='prefer_insecure', - help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)') - workarounds.add_option( - '--user-agent', metavar='UA', - dest='user_agent', help='specify a custom user agent') - workarounds.add_option( - '--referer', metavar='REF', - dest='referer', default=None, - help='specify a custom referer, use if the video access is restricted to one domain', - ) - workarounds.add_option( - '--add-header', metavar='FIELD:VALUE', - dest='headers', action='append', - help='specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times', - ) - workarounds.add_option( - '--bidi-workaround', dest='bidi_workaround', action='store_true', - help=u'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH') - - verbosity.add_option('-q', '--quiet', - action='store_true', dest='quiet', help='activates quiet mode', default=False) - verbosity.add_option( - '--no-warnings', - dest='no_warnings', action='store_true', default=False, - help='Ignore warnings') - verbosity.add_option('-s', '--simulate', - action='store_true', dest='simulate', help='do not download the video and do not write anything to disk', default=False) - verbosity.add_option('--skip-download', - action='store_true', dest='skip_download', help='do not download the video', default=False) - verbosity.add_option('-g', '--get-url', - action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False) - verbosity.add_option('-e', '--get-title', - action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False) - verbosity.add_option('--get-id', - action='store_true', dest='getid', help='simulate, quiet but print id', default=False) - verbosity.add_option('--get-thumbnail', - action='store_true', dest='getthumbnail', - help='simulate, quiet but print thumbnail URL', default=False) - verbosity.add_option('--get-description', - action='store_true', dest='getdescription', - help='simulate, quiet but print video description', default=False) - verbosity.add_option('--get-duration', - action='store_true', dest='getduration', - help='simulate, quiet but print video length', default=False) - verbosity.add_option('--get-filename', - action='store_true', dest='getfilename', - help='simulate, quiet but print output filename', default=False) - verbosity.add_option('--get-format', - action='store_true', dest='getformat', - help='simulate, quiet but print output format', default=False) - verbosity.add_option('-j', '--dump-json', - action='store_true', dest='dumpjson', - help='simulate, quiet but print JSON information. See --output for a description of available keys.', default=False) - verbosity.add_option('--newline', - action='store_true', dest='progress_with_newline', help='output progress bar as new lines', default=False) - verbosity.add_option('--no-progress', - action='store_true', dest='noprogress', help='do not print progress bar', default=False) - verbosity.add_option('--console-title', - action='store_true', dest='consoletitle', - help='display progress in console titlebar', default=False) - verbosity.add_option('-v', '--verbose', - action='store_true', dest='verbose', help='print various debugging information', default=False) - verbosity.add_option('--dump-intermediate-pages', - action='store_true', dest='dump_intermediate_pages', default=False, - help='print downloaded pages to debug problems (very verbose)') - verbosity.add_option('--write-pages', - action='store_true', dest='write_pages', default=False, - help='Write downloaded intermediary pages to files in the current directory to debug problems') - verbosity.add_option('--youtube-print-sig-code', - action='store_true', dest='youtube_print_sig_code', default=False, - help=optparse.SUPPRESS_HELP) - verbosity.add_option('--print-traffic', - dest='debug_printtraffic', action='store_true', default=False, - help='Display sent and read HTTP traffic') - - - filesystem.add_option('-a', '--batch-file', - dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)') - filesystem.add_option('--id', - action='store_true', dest='useid', help='use only video ID in file name', default=False) - filesystem.add_option('-A', '--auto-number', - action='store_true', dest='autonumber', - help='number downloaded files starting from 00000', default=False) - filesystem.add_option('-o', '--output', - dest='outtmpl', metavar='TEMPLATE', - help=('output filename template. Use %(title)s to get the title, ' - '%(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, ' - '%(autonumber)s to get an automatically incremented number, ' - '%(ext)s for the filename extension, ' - '%(format)s for the format description (like "22 - 1280x720" or "HD"), ' - '%(format_id)s for the unique id of the format (like Youtube\'s itags: "137"), ' - '%(upload_date)s for the upload date (YYYYMMDD), ' - '%(extractor)s for the provider (youtube, metacafe, etc), ' - '%(id)s for the video id, %(playlist)s for the playlist the video is in, ' - '%(playlist_index)s for the position in the playlist and %% for a literal percent. ' - '%(height)s and %(width)s for the width and height of the video format. ' - '%(resolution)s for a textual description of the resolution of the video format. ' - 'Use - to output to stdout. Can also be used to download to a different directory, ' - 'for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .')) - filesystem.add_option('--autonumber-size', - dest='autonumber_size', metavar='NUMBER', - help='Specifies the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given') - filesystem.add_option('--restrict-filenames', - action='store_true', dest='restrictfilenames', - help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames', default=False) - filesystem.add_option('-t', '--title', - action='store_true', dest='usetitle', help='[deprecated] use title in file name (default)', default=False) - filesystem.add_option('-l', '--literal', - action='store_true', dest='usetitle', help='[deprecated] alias of --title', default=False) - filesystem.add_option('-w', '--no-overwrites', - action='store_true', dest='nooverwrites', help='do not overwrite files', default=False) - filesystem.add_option('-c', '--continue', - action='store_true', dest='continue_dl', help='force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.', default=True) - filesystem.add_option('--no-continue', - action='store_false', dest='continue_dl', - help='do not resume partially downloaded files (restart from beginning)') - filesystem.add_option('--no-part', - action='store_true', dest='nopart', help='do not use .part files', default=False) - filesystem.add_option('--no-mtime', - action='store_false', dest='updatetime', - help='do not use the Last-modified header to set the file modification time', default=True) - filesystem.add_option('--write-description', - action='store_true', dest='writedescription', - help='write video description to a .description file', default=False) - filesystem.add_option('--write-info-json', - action='store_true', dest='writeinfojson', - help='write video metadata to a .info.json file', default=False) - filesystem.add_option('--write-annotations', - action='store_true', dest='writeannotations', - help='write video annotations to a .annotation file', default=False) - filesystem.add_option('--write-thumbnail', - action='store_true', dest='writethumbnail', - help='write thumbnail image to disk', default=False) - filesystem.add_option('--load-info', - dest='load_info_filename', metavar='FILE', - help='json file containing the video information (created with the "--write-json" option)') - filesystem.add_option('--cookies', - dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in') - filesystem.add_option( - '--cache-dir', dest='cachedir', default=None, metavar='DIR', - help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.') - filesystem.add_option( - '--no-cache-dir', action='store_const', const=False, dest='cachedir', - help='Disable filesystem caching') - filesystem.add_option( - '--rm-cache-dir', action='store_true', dest='rm_cachedir', - help='Delete all filesystem cache files') - - - postproc.add_option('-x', '--extract-audio', action='store_true', dest='extractaudio', default=False, - help='convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)') - postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best', - help='"best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; best by default') - postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='5', - help='ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default 5)') - postproc.add_option('--recode-video', metavar='FORMAT', dest='recodevideo', default=None, - help='Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv)') - postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False, - help='keeps the video file on disk after the post-processing; the video is erased by default') - postproc.add_option('--no-post-overwrites', action='store_true', dest='nopostoverwrites', default=False, - help='do not overwrite post-processed files; the post-processed files are overwritten by default') - postproc.add_option('--embed-subs', action='store_true', dest='embedsubtitles', default=False, - help='embed subtitles in the video (only for mp4 videos)') - postproc.add_option('--embed-thumbnail', action='store_true', dest='embedthumbnail', default=False, - help='embed thumbnail in the audio as cover art') - postproc.add_option('--add-metadata', action='store_true', dest='addmetadata', default=False, - help='write metadata to the video file') - postproc.add_option('--xattrs', action='store_true', dest='xattrs', default=False, - help='write metadata to the video file\'s xattrs (using dublin core and xdg standards)') - postproc.add_option('--prefer-avconv', action='store_false', dest='prefer_ffmpeg', - help='Prefer avconv over ffmpeg for running the postprocessors (default)') - postproc.add_option('--prefer-ffmpeg', action='store_true', dest='prefer_ffmpeg', - help='Prefer ffmpeg over avconv for running the postprocessors') - postproc.add_option( - '--exec', metavar='CMD', dest='exec_cmd', - help='Execute a command on the file after downloading, similar to find\'s -exec syntax. Example: --exec \'adb push {} /sdcard/Music/ && rm {}\'' ) - - parser.add_option_group(general) - parser.add_option_group(selection) - parser.add_option_group(downloader) - parser.add_option_group(filesystem) - parser.add_option_group(verbosity) - parser.add_option_group(workarounds) - parser.add_option_group(video_format) - parser.add_option_group(subtitles) - parser.add_option_group(authentication) - parser.add_option_group(postproc) - - if overrideArguments is not None: - opts, args = parser.parse_args(overrideArguments) - if opts.verbose: - write_string(u'[debug] Override config: ' + repr(overrideArguments) + '\n') - else: - commandLineConf = sys.argv[1:] - if '--ignore-config' in commandLineConf: - systemConf = [] - userConf = [] - else: - systemConf = _readOptions('/etc/youtube-dl.conf') - if '--ignore-config' in systemConf: - userConf = [] - else: - userConf = _readUserConf() - argv = systemConf + userConf + commandLineConf - - opts, args = parser.parse_args(argv) - if opts.verbose: - write_string(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n') - write_string(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n') - write_string(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n') - - return parser, opts, args - - def _real_main(argv=None): # Compatibility fixes for Windows if sys.platform == 'win32': diff --git a/youtube_dl/options.py b/youtube_dl/options.py new file mode 100644 index 000000000..d8c161c9e --- /dev/null +++ b/youtube_dl/options.py @@ -0,0 +1,479 @@ +import os.path +import optparse +import shlex +import sys + +from .utils import ( + get_term_width, + write_string, +) +from .version import __version__ + + +def parseOpts(overrideArguments=None): + def _readOptions(filename_bytes, default=[]): + try: + optionf = open(filename_bytes) + except IOError: + return default # silently skip if file is not present + try: + res = [] + for l in optionf: + res += shlex.split(l, comments=True) + finally: + optionf.close() + return res + + def _readUserConf(): + xdg_config_home = os.environ.get('XDG_CONFIG_HOME') + if xdg_config_home: + userConfFile = os.path.join(xdg_config_home, 'youtube-dl', 'config') + if not os.path.isfile(userConfFile): + userConfFile = os.path.join(xdg_config_home, 'youtube-dl.conf') + else: + userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl', 'config') + if not os.path.isfile(userConfFile): + userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf') + userConf = _readOptions(userConfFile, None) + + if userConf is None: + appdata_dir = os.environ.get('appdata') + if appdata_dir: + userConf = _readOptions( + os.path.join(appdata_dir, 'youtube-dl', 'config'), + default=None) + if userConf is None: + userConf = _readOptions( + os.path.join(appdata_dir, 'youtube-dl', 'config.txt'), + default=None) + + if userConf is None: + userConf = _readOptions( + os.path.join(os.path.expanduser('~'), 'youtube-dl.conf'), + default=None) + if userConf is None: + userConf = _readOptions( + os.path.join(os.path.expanduser('~'), 'youtube-dl.conf.txt'), + default=None) + + if userConf is None: + userConf = [] + + return userConf + + def _format_option_string(option): + ''' ('-o', '--option') -> -o, --format METAVAR''' + + opts = [] + + if option._short_opts: + opts.append(option._short_opts[0]) + if option._long_opts: + opts.append(option._long_opts[0]) + if len(opts) > 1: + opts.insert(1, ', ') + + if option.takes_value(): opts.append(' %s' % option.metavar) + + return "".join(opts) + + def _comma_separated_values_options_callback(option, opt_str, value, parser): + setattr(parser.values, option.dest, value.split(',')) + + def _hide_login_info(opts): + opts = list(opts) + for private_opt in ['-p', '--password', '-u', '--username', '--video-password']: + try: + i = opts.index(private_opt) + opts[i+1] = '<PRIVATE>' + except ValueError: + pass + return opts + + max_width = 80 + max_help_position = 80 + + # No need to wrap help messages if we're on a wide console + columns = get_term_width() + if columns: max_width = columns + + fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position) + fmt.format_option_strings = _format_option_string + + kw = { + 'version' : __version__, + 'formatter' : fmt, + 'usage' : '%prog [options] url [url...]', + 'conflict_handler' : 'resolve', + } + + parser = optparse.OptionParser(**kw) + + # option groups + general = optparse.OptionGroup(parser, 'General Options') + selection = optparse.OptionGroup(parser, 'Video Selection') + authentication = optparse.OptionGroup(parser, 'Authentication Options') + video_format = optparse.OptionGroup(parser, 'Video Format Options') + subtitles = optparse.OptionGroup(parser, 'Subtitle Options') + downloader = optparse.OptionGroup(parser, 'Download Options') + postproc = optparse.OptionGroup(parser, 'Post-processing Options') + filesystem = optparse.OptionGroup(parser, 'Filesystem Options') + workarounds = optparse.OptionGroup(parser, 'Workarounds') + verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') + + general.add_option('-h', '--help', + action='help', help='print this help text and exit') + general.add_option('-v', '--version', + action='version', help='print program version and exit') + general.add_option('-U', '--update', + action='store_true', dest='update_self', help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)') + general.add_option('-i', '--ignore-errors', + action='store_true', dest='ignoreerrors', help='continue on download errors, for example to skip unavailable videos in a playlist', default=False) + general.add_option('--abort-on-error', + action='store_false', dest='ignoreerrors', + help='Abort downloading of further videos (in the playlist or the command line) if an error occurs') + general.add_option('--dump-user-agent', + action='store_true', dest='dump_user_agent', + help='display the current browser identification', default=False) + general.add_option('--list-extractors', + action='store_true', dest='list_extractors', + help='List all supported extractors and the URLs they would handle', default=False) + general.add_option('--extractor-descriptions', + action='store_true', dest='list_extractor_descriptions', + help='Output descriptions of all supported extractors', default=False) + general.add_option( + '--proxy', dest='proxy', default=None, metavar='URL', + help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection') + general.add_option( + '--socket-timeout', dest='socket_timeout', + type=float, default=None, help=u'Time to wait before giving up, in seconds') + general.add_option( + '--default-search', + dest='default_search', metavar='PREFIX', + help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.') + general.add_option( + '--ignore-config', + action='store_true', + help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)') + + selection.add_option( + '--playlist-start', + dest='playliststart', metavar='NUMBER', default=1, type=int, + help='playlist video to start at (default is %default)') + selection.add_option( + '--playlist-end', + dest='playlistend', metavar='NUMBER', default=None, type=int, + help='playlist video to end at (default is last)') + selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)') + selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)') + selection.add_option('--max-downloads', metavar='NUMBER', + dest='max_downloads', type=int, default=None, + help='Abort after downloading NUMBER files') + selection.add_option('--min-filesize', metavar='SIZE', dest='min_filesize', help="Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)", default=None) + selection.add_option('--max-filesize', metavar='SIZE', dest='max_filesize', help="Do not download any videos larger than SIZE (e.g. 50k or 44.6m)", default=None) + selection.add_option('--date', metavar='DATE', dest='date', help='download only videos uploaded in this date', default=None) + selection.add_option( + '--datebefore', metavar='DATE', dest='datebefore', default=None, + help='download only videos uploaded on or before this date (i.e. inclusive)') + selection.add_option( + '--dateafter', metavar='DATE', dest='dateafter', default=None, + help='download only videos uploaded on or after this date (i.e. inclusive)') + selection.add_option( + '--min-views', metavar='COUNT', dest='min_views', + default=None, type=int, + help="Do not download any videos with less than COUNT views",) + selection.add_option( + '--max-views', metavar='COUNT', dest='max_views', + default=None, type=int, + help="Do not download any videos with more than COUNT views",) + selection.add_option('--no-playlist', action='store_true', dest='noplaylist', help='download only the currently playing video', default=False) + selection.add_option('--age-limit', metavar='YEARS', dest='age_limit', + help='download only videos suitable for the given age', + default=None, type=int) + selection.add_option('--download-archive', metavar='FILE', + dest='download_archive', + help='Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.') + selection.add_option( + '--include-ads', dest='include_ads', + action='store_true', + help='Download advertisements as well (experimental)') + selection.add_option( + '--youtube-include-dash-manifest', action='store_true', + dest='youtube_include_dash_manifest', default=False, + help='Try to download the DASH manifest on YouTube videos (experimental)') + + authentication.add_option('-u', '--username', + dest='username', metavar='USERNAME', help='account username') + authentication.add_option('-p', '--password', + dest='password', metavar='PASSWORD', help='account password') + authentication.add_option('-2', '--twofactor', + dest='twofactor', metavar='TWOFACTOR', help='two-factor auth code') + authentication.add_option('-n', '--netrc', + action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False) + authentication.add_option('--video-password', + dest='videopassword', metavar='PASSWORD', help='video password (vimeo, smotri)') + + + video_format.add_option('-f', '--format', + action='store', dest='format', metavar='FORMAT', default=None, + help='video format code, specify the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported. You can also use the special names "best", "bestvideo", "bestaudio", "worst", "worstvideo" and "worstaudio". By default, youtube-dl will pick the best quality.') + video_format.add_option('--all-formats', + action='store_const', dest='format', help='download all available video formats', const='all') + video_format.add_option('--prefer-free-formats', + action='store_true', dest='prefer_free_formats', default=False, help='prefer free video formats unless a specific one is requested') + video_format.add_option('--max-quality', + action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download') + video_format.add_option('-F', '--list-formats', + action='store_true', dest='listformats', help='list all available formats') + + subtitles.add_option('--write-sub', '--write-srt', + action='store_true', dest='writesubtitles', + help='write subtitle file', default=False) + subtitles.add_option('--write-auto-sub', '--write-automatic-sub', + action='store_true', dest='writeautomaticsub', + help='write automatic subtitle file (youtube only)', default=False) + subtitles.add_option('--all-subs', + action='store_true', dest='allsubtitles', + help='downloads all the available subtitles of the video', default=False) + subtitles.add_option('--list-subs', + action='store_true', dest='listsubtitles', + help='lists all available subtitles for the video', default=False) + subtitles.add_option('--sub-format', + action='store', dest='subtitlesformat', metavar='FORMAT', + help='subtitle format (default=srt) ([sbv/vtt] youtube only)', default='srt') + subtitles.add_option('--sub-lang', '--sub-langs', '--srt-lang', + action='callback', dest='subtitleslangs', metavar='LANGS', type='str', + default=[], callback=_comma_separated_values_options_callback, + help='languages of the subtitles to download (optional) separated by commas, use IETF language tags like \'en,pt\'') + + downloader.add_option('-r', '--rate-limit', + dest='ratelimit', metavar='LIMIT', help='maximum download rate in bytes per second (e.g. 50K or 4.2M)') + downloader.add_option('-R', '--retries', + dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10) + downloader.add_option('--buffer-size', + dest='buffersize', metavar='SIZE', help='size of download buffer (e.g. 1024 or 16K) (default is %default)', default="1024") + downloader.add_option('--no-resize-buffer', + action='store_true', dest='noresizebuffer', + help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.', default=False) + downloader.add_option('--test', action='store_true', dest='test', default=False, help=optparse.SUPPRESS_HELP) + + workarounds.add_option( + '--encoding', dest='encoding', metavar='ENCODING', + help='Force the specified encoding (experimental)') + workarounds.add_option( + '--no-check-certificate', action='store_true', + dest='no_check_certificate', default=False, + help='Suppress HTTPS certificate validation.') + workarounds.add_option( + '--prefer-insecure', '--prefer-unsecure', action='store_true', dest='prefer_insecure', + help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)') + workarounds.add_option( + '--user-agent', metavar='UA', + dest='user_agent', help='specify a custom user agent') + workarounds.add_option( + '--referer', metavar='REF', + dest='referer', default=None, + help='specify a custom referer, use if the video access is restricted to one domain', + ) + workarounds.add_option( + '--add-header', metavar='FIELD:VALUE', + dest='headers', action='append', + help='specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times', + ) + workarounds.add_option( + '--bidi-workaround', dest='bidi_workaround', action='store_true', + help=u'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH') + + verbosity.add_option('-q', '--quiet', + action='store_true', dest='quiet', help='activates quiet mode', default=False) + verbosity.add_option( + '--no-warnings', + dest='no_warnings', action='store_true', default=False, + help='Ignore warnings') + verbosity.add_option('-s', '--simulate', + action='store_true', dest='simulate', help='do not download the video and do not write anything to disk', default=False) + verbosity.add_option('--skip-download', + action='store_true', dest='skip_download', help='do not download the video', default=False) + verbosity.add_option('-g', '--get-url', + action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False) + verbosity.add_option('-e', '--get-title', + action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False) + verbosity.add_option('--get-id', + action='store_true', dest='getid', help='simulate, quiet but print id', default=False) + verbosity.add_option('--get-thumbnail', + action='store_true', dest='getthumbnail', + help='simulate, quiet but print thumbnail URL', default=False) + verbosity.add_option('--get-description', + action='store_true', dest='getdescription', + help='simulate, quiet but print video description', default=False) + verbosity.add_option('--get-duration', + action='store_true', dest='getduration', + help='simulate, quiet but print video length', default=False) + verbosity.add_option('--get-filename', + action='store_true', dest='getfilename', + help='simulate, quiet but print output filename', default=False) + verbosity.add_option('--get-format', + action='store_true', dest='getformat', + help='simulate, quiet but print output format', default=False) + verbosity.add_option('-j', '--dump-json', + action='store_true', dest='dumpjson', + help='simulate, quiet but print JSON information. See --output for a description of available keys.', default=False) + verbosity.add_option('--newline', + action='store_true', dest='progress_with_newline', help='output progress bar as new lines', default=False) + verbosity.add_option('--no-progress', + action='store_true', dest='noprogress', help='do not print progress bar', default=False) + verbosity.add_option('--console-title', + action='store_true', dest='consoletitle', + help='display progress in console titlebar', default=False) + verbosity.add_option('-v', '--verbose', + action='store_true', dest='verbose', help='print various debugging information', default=False) + verbosity.add_option('--dump-intermediate-pages', + action='store_true', dest='dump_intermediate_pages', default=False, + help='print downloaded pages to debug problems (very verbose)') + verbosity.add_option('--write-pages', + action='store_true', dest='write_pages', default=False, + help='Write downloaded intermediary pages to files in the current directory to debug problems') + verbosity.add_option('--youtube-print-sig-code', + action='store_true', dest='youtube_print_sig_code', default=False, + help=optparse.SUPPRESS_HELP) + verbosity.add_option('--print-traffic', + dest='debug_printtraffic', action='store_true', default=False, + help='Display sent and read HTTP traffic') + + + filesystem.add_option('-a', '--batch-file', + dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)') + filesystem.add_option('--id', + action='store_true', dest='useid', help='use only video ID in file name', default=False) + filesystem.add_option('-A', '--auto-number', + action='store_true', dest='autonumber', + help='number downloaded files starting from 00000', default=False) + filesystem.add_option('-o', '--output', + dest='outtmpl', metavar='TEMPLATE', + help=('output filename template. Use %(title)s to get the title, ' + '%(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, ' + '%(autonumber)s to get an automatically incremented number, ' + '%(ext)s for the filename extension, ' + '%(format)s for the format description (like "22 - 1280x720" or "HD"), ' + '%(format_id)s for the unique id of the format (like Youtube\'s itags: "137"), ' + '%(upload_date)s for the upload date (YYYYMMDD), ' + '%(extractor)s for the provider (youtube, metacafe, etc), ' + '%(id)s for the video id, %(playlist)s for the playlist the video is in, ' + '%(playlist_index)s for the position in the playlist and %% for a literal percent. ' + '%(height)s and %(width)s for the width and height of the video format. ' + '%(resolution)s for a textual description of the resolution of the video format. ' + 'Use - to output to stdout. Can also be used to download to a different directory, ' + 'for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .')) + filesystem.add_option('--autonumber-size', + dest='autonumber_size', metavar='NUMBER', + help='Specifies the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given') + filesystem.add_option('--restrict-filenames', + action='store_true', dest='restrictfilenames', + help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames', default=False) + filesystem.add_option('-t', '--title', + action='store_true', dest='usetitle', help='[deprecated] use title in file name (default)', default=False) + filesystem.add_option('-l', '--literal', + action='store_true', dest='usetitle', help='[deprecated] alias of --title', default=False) + filesystem.add_option('-w', '--no-overwrites', + action='store_true', dest='nooverwrites', help='do not overwrite files', default=False) + filesystem.add_option('-c', '--continue', + action='store_true', dest='continue_dl', help='force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.', default=True) + filesystem.add_option('--no-continue', + action='store_false', dest='continue_dl', + help='do not resume partially downloaded files (restart from beginning)') + filesystem.add_option('--no-part', + action='store_true', dest='nopart', help='do not use .part files', default=False) + filesystem.add_option('--no-mtime', + action='store_false', dest='updatetime', + help='do not use the Last-modified header to set the file modification time', default=True) + filesystem.add_option('--write-description', + action='store_true', dest='writedescription', + help='write video description to a .description file', default=False) + filesystem.add_option('--write-info-json', + action='store_true', dest='writeinfojson', + help='write video metadata to a .info.json file', default=False) + filesystem.add_option('--write-annotations', + action='store_true', dest='writeannotations', + help='write video annotations to a .annotation file', default=False) + filesystem.add_option('--write-thumbnail', + action='store_true', dest='writethumbnail', + help='write thumbnail image to disk', default=False) + filesystem.add_option('--load-info', + dest='load_info_filename', metavar='FILE', + help='json file containing the video information (created with the "--write-json" option)') + filesystem.add_option('--cookies', + dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in') + filesystem.add_option( + '--cache-dir', dest='cachedir', default=None, metavar='DIR', + help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.') + filesystem.add_option( + '--no-cache-dir', action='store_const', const=False, dest='cachedir', + help='Disable filesystem caching') + filesystem.add_option( + '--rm-cache-dir', action='store_true', dest='rm_cachedir', + help='Delete all filesystem cache files') + + + postproc.add_option('-x', '--extract-audio', action='store_true', dest='extractaudio', default=False, + help='convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)') + postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best', + help='"best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; best by default') + postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='5', + help='ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default 5)') + postproc.add_option('--recode-video', metavar='FORMAT', dest='recodevideo', default=None, + help='Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv)') + postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False, + help='keeps the video file on disk after the post-processing; the video is erased by default') + postproc.add_option('--no-post-overwrites', action='store_true', dest='nopostoverwrites', default=False, + help='do not overwrite post-processed files; the post-processed files are overwritten by default') + postproc.add_option('--embed-subs', action='store_true', dest='embedsubtitles', default=False, + help='embed subtitles in the video (only for mp4 videos)') + postproc.add_option('--embed-thumbnail', action='store_true', dest='embedthumbnail', default=False, + help='embed thumbnail in the audio as cover art') + postproc.add_option('--add-metadata', action='store_true', dest='addmetadata', default=False, + help='write metadata to the video file') + postproc.add_option('--xattrs', action='store_true', dest='xattrs', default=False, + help='write metadata to the video file\'s xattrs (using dublin core and xdg standards)') + postproc.add_option('--prefer-avconv', action='store_false', dest='prefer_ffmpeg', + help='Prefer avconv over ffmpeg for running the postprocessors (default)') + postproc.add_option('--prefer-ffmpeg', action='store_true', dest='prefer_ffmpeg', + help='Prefer ffmpeg over avconv for running the postprocessors') + postproc.add_option( + '--exec', metavar='CMD', dest='exec_cmd', + help='Execute a command on the file after downloading, similar to find\'s -exec syntax. Example: --exec \'adb push {} /sdcard/Music/ && rm {}\'' ) + + parser.add_option_group(general) + parser.add_option_group(selection) + parser.add_option_group(downloader) + parser.add_option_group(filesystem) + parser.add_option_group(verbosity) + parser.add_option_group(workarounds) + parser.add_option_group(video_format) + parser.add_option_group(subtitles) + parser.add_option_group(authentication) + parser.add_option_group(postproc) + + if overrideArguments is not None: + opts, args = parser.parse_args(overrideArguments) + if opts.verbose: + write_string(u'[debug] Override config: ' + repr(overrideArguments) + '\n') + else: + commandLineConf = sys.argv[1:] + if '--ignore-config' in commandLineConf: + systemConf = [] + userConf = [] + else: + systemConf = _readOptions('/etc/youtube-dl.conf') + if '--ignore-config' in systemConf: + userConf = [] + else: + userConf = _readUserConf() + argv = systemConf + userConf + commandLineConf + + opts, args = parser.parse_args(argv) + if opts.verbose: + write_string(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n') + write_string(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n') + write_string(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n') + + return parser, opts, args -- cgit v1.2.3 From 2daabe49921227f1a2fc3fd4234e6ecfa7af4cfd Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 13 Sep 2014 07:58:44 +0200 Subject: [options] Correct option usage --- youtube_dl/__init__.py | 4 +++- youtube_dl/options.py | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index cc0a4071f..42d0a0180 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -86,6 +86,9 @@ import random import sys +from .options import ( + parseOpts, +) from .utils import ( compat_getpass, compat_print, @@ -94,7 +97,6 @@ from .utils import ( decodeOption, DownloadError, MaxDownloadsReached, - parseOpts, preferredencoding, read_batch_urls, SameFileError, diff --git a/youtube_dl/options.py b/youtube_dl/options.py index d8c161c9e..31baab469 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + import os.path import optparse import shlex -- cgit v1.2.3 From 652f283135705f5734d44913811396dac4a98b29 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 13 Sep 2014 07:59:33 +0200 Subject: [youporn] Remove md5 from test Has changed multiple times, and we're checking that it looks like a video now by default. --- youtube_dl/extractor/youporn.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index d456c4da5..7bfda45e7 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -23,7 +23,6 @@ class YouPornIE(InfoExtractor): _VALID_URL = r'^(?P<proto>https?://)(?:www\.)?(?P<url>youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+))' _TEST = { 'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/', - 'md5': '71ec5fcfddacf80f495efa8b6a8d9a89', 'info_dict': { 'id': '505835', 'ext': 'mp4', -- cgit v1.2.3 From 4eefbfdbfd472398ed5e40b13d20e3a92f837f52 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 13 Sep 2014 08:34:15 +0200 Subject: [utils] Fix find_xpath_attr on 2.6 --- youtube_dl/utils.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index d920c65a4..8828161e5 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -280,6 +280,11 @@ if sys.version_info >= (2, 7): return node.find(expr) else: def find_xpath_attr(node, xpath, key, val): + # Here comes the crazy part: In 2.6, if the xpath is a unicode, + # .//node does not match if a node is a direct child of . ! + if isinstance(xpath, unicode): + xpath = xpath.encode('ascii') + for f in node.findall(xpath): if f.attrib.get(key) == val: return f -- cgit v1.2.3 From 6539c91a7806112be9750a72c154f5a1f99e6c78 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 13 Sep 2014 08:51:21 +0200 Subject: [deezer] Add support for geoblocking --- youtube_dl/extractor/deezer.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/youtube_dl/extractor/deezer.py b/youtube_dl/extractor/deezer.py index 62c181529..c3205ff5f 100644 --- a/youtube_dl/extractor/deezer.py +++ b/youtube_dl/extractor/deezer.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor from ..utils import ( + ExtractorError, int_or_none, orderedSet, ) @@ -21,6 +22,7 @@ class DeezerPlaylistIE(InfoExtractor): 'thumbnail': 're:^https?://cdn-images.deezer.com/images/cover/.*\.jpg$', }, 'playlist_count': 30, + 'skip': 'Only available in .de', } def _real_extract(self, url): @@ -31,6 +33,13 @@ class DeezerPlaylistIE(InfoExtractor): playlist_id = mobj.group('id') webpage = self._download_webpage(url, playlist_id) + geoblocking_msg = self._html_search_regex( + r'<p class="soon-txt">(.*?)</p>', webpage, 'geoblocking message', + default=None) + if geoblocking_msg is not None: + raise ExtractorError( + 'Deezer said: %s' % geoblocking_msg, expected=True) + data_json = self._search_regex( r'naboo\.display\(\'[^\']+\',\s*(.*?)\);\n', webpage, 'data JSON') data = json.loads(data_json) -- cgit v1.2.3 From 50317dbbada099eaefdc4bc56fff8109907a827a Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 13 Sep 2014 09:01:57 +0200 Subject: [facebook] Fix and caption if title is empty --- youtube_dl/extractor/facebook.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index afb34ce51..d675a939d 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -29,13 +29,13 @@ class FacebookIE(InfoExtractor): _NETRC_MACHINE = 'facebook' IE_NAME = 'facebook' _TESTS = [{ - 'url': 'https://www.facebook.com/photo.php?v=120708114770723', - 'md5': '48975a41ccc4b7a581abd68651c1a5a8', + 'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf', + 'md5': '6a40d33c0eccbb1af76cf0485a052659', 'info_dict': { - 'id': '120708114770723', + 'id': '637842556329505', 'ext': 'mp4', - 'duration': 279, - 'title': 'PEOPLE ARE AWESOME 2013', + 'duration': 38, + 'title': 'Did you know Kei Nishikori is the first Asian man to ever reach a Grand Slam fin...', } }, { 'url': 'https://www.facebook.com/video.php?v=10204634152394104', @@ -125,7 +125,16 @@ class FacebookIE(InfoExtractor): raise ExtractorError('Cannot find video URL') video_title = self._html_search_regex( - r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, 'title') + r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, 'title', + fatal=False) + if not video_title: + video_title = self._html_search_regex( + r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(.*?)</span>', + webpage, 'alternative title', default=None) + if len(video_title) > 80 + 3: + video_title = video_title[:80] + '...' + if not video_title: + video_title = 'Facebook video #%s' % video_id return { 'id': video_id, -- cgit v1.2.3 From 3a36d3782e220f36870e94fa577a3f01e0040ca6 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 13 Sep 2014 09:04:03 +0200 Subject: [chilloutzone] Make test case more flexible --- youtube_dl/extractor/chilloutzone.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/chilloutzone.py b/youtube_dl/extractor/chilloutzone.py index a62395d4b..c922f6959 100644 --- a/youtube_dl/extractor/chilloutzone.py +++ b/youtube_dl/extractor/chilloutzone.py @@ -42,7 +42,7 @@ class ChilloutzoneIE(InfoExtractor): 'id': '85523671', 'ext': 'mp4', 'title': 'The Sunday Times - Icons', - 'description': 'md5:a5f7ff82e2f7a9ed77473fe666954e84', + 'description': 're:(?s)^Watch the making of - makingoficons.com.{300,}', 'uploader': 'Us', 'uploader_id': 'usfilms', 'upload_date': '20140131' -- cgit v1.2.3 From dc752ff442f74926b3301cf26f6e418e5cf8ec7f Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 13 Sep 2014 09:04:48 +0200 Subject: [br] Remove deleted video test case --- youtube_dl/extractor/br.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/youtube_dl/extractor/br.py b/youtube_dl/extractor/br.py index 86f0c2861..4e2960c62 100644 --- a/youtube_dl/extractor/br.py +++ b/youtube_dl/extractor/br.py @@ -28,17 +28,6 @@ class BRIE(InfoExtractor): 'duration': 34, } }, - { - 'url': 'http://www.br.de/mediathek/video/sendungen/unter-unserem-himmel/unter-unserem-himmel-alpen-ueber-den-pass-100.html', - 'md5': 'ab451b09d861dbed7d7cc9ab0be19ebe', - 'info_dict': { - 'id': '2c060e69-3a27-4e13-b0f0-668fac17d812', - 'ext': 'mp4', - 'title': 'Über den Pass', - 'description': 'Die Eroberung der Alpen: Über den Pass', - 'duration': 2588, - } - }, { 'url': 'http://www.br.de/nachrichten/schaeuble-haushaltsentwurf-bundestag-100.html', 'md5': '3db0df1a9a9cd9fa0c70e6ea8aa8e820', -- cgit v1.2.3 From bf0ff93277ba36fbda70223ca7e78b5132e54ddf Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 13 Sep 2014 09:09:55 +0200 Subject: [ard] Make more robust against missing thumbnails I cannot reproduce this error, it's from travis. --- youtube_dl/extractor/ard.py | 6 ++++-- youtube_dl/utils.py | 11 +++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index ef94c7239..12457f0f9 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -13,6 +13,7 @@ from ..utils import ( int_or_none, parse_duration, unified_strdate, + xpath_text, ) @@ -157,8 +158,9 @@ class ARDIE(InfoExtractor): player_url = mobj.group('mainurl') + '~playerXml.xml' doc = self._download_xml(player_url, display_id) video_node = doc.find('./video') - upload_date = unified_strdate(video_node.find('./broadcastDate').text) - thumbnail = video_node.find('.//teaserImage//variant/url').text + upload_date = unified_strdate(xpath_text( + video_node, './broadcastDate')) + thumbnail = xpath_text(video_node, './/teaserImage//variant/url') formats = [] for a in video_node.findall('.//asset'): diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 8828161e5..7536b3b36 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -304,6 +304,17 @@ def xpath_with_ns(path, ns_map): return '/'.join(replaced) +def xpath_text(node, xpath, name=None, fatal=False): + n = node.find(xpath) + if n is None: + if fatal: + name = xpath if name is None else name + raise ExtractorError('Could not find XML element %s' % name) + else: + return None + return n.text + + compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix class BaseHTMLParser(compat_html_parser.HTMLParser): def __init(self): -- cgit v1.2.3 From d74bebd50263a2f744595b9a54825914bc07657b Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 13 Sep 2014 09:11:14 +0200 Subject: [utils] Apply 2.6 xpath craziness This fixes ARD on 2.6 --- youtube_dl/utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 7536b3b36..247788078 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -305,6 +305,9 @@ def xpath_with_ns(path, ns_map): def xpath_text(node, xpath, name=None, fatal=False): + if sys.version_info < (2, 7): # Crazy 2.6 + xpath = xpath.encode('ascii') + n = node.find(xpath) if n is None: if fatal: -- cgit v1.2.3 From 7e6a715380fa0a99e496e1eab6162b4d7a115dc6 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 13 Sep 2014 09:13:20 +0200 Subject: [adultswim] Avoid extremely long lines --- youtube_dl/extractor/adultswim.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/adultswim.py b/youtube_dl/extractor/adultswim.py index a00bfcb35..b4b40f2d4 100644 --- a/youtube_dl/extractor/adultswim.py +++ b/youtube_dl/extractor/adultswim.py @@ -75,7 +75,9 @@ class AdultSwimIE(InfoExtractor): video_path = mobj.group('path') webpage = self._download_webpage(url, video_path) - episode_id = self._html_search_regex(r'<link rel="video_src" href="http://i\.adultswim\.com/adultswim/adultswimtv/tools/swf/viralplayer.swf\?id=([0-9a-f]+?)"\s*/?\s*>', webpage, 'episode_id') + episode_id = self._html_search_regex( + r'<link rel="video_src" href="http://i\.adultswim\.com/adultswim/adultswimtv/tools/swf/viralplayer.swf\?id=([0-9a-f]+?)"\s*/?\s*>', + webpage, 'episode_id') title = self._og_search_title(webpage) index_url = 'http://asfix.adultswim.com/asfix-svc/episodeSearch/getEpisodesByIDs?networkName=AS&ids=%s' % episode_id @@ -97,7 +99,9 @@ class AdultSwimIE(InfoExtractor): duration = segment_el.attrib.get('duration') segment_url = 'http://asfix.adultswim.com/asfix-svc/episodeservices/getCvpPlaylist?networkName=AS&id=%s' % segment_id - idoc = self._download_xml(segment_url, segment_title, 'Downloading segment information', 'Unable to download segment information') + idoc = self._download_xml( + segment_url, segment_title, + 'Downloading segment information', 'Unable to download segment information') formats = [] file_els = idoc.findall('.//files/file') -- cgit v1.2.3 From d05cfe06006c4a44032e95dde047d5e12be8674c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 13 Sep 2014 20:59:16 +0700 Subject: [YoutubeDL/utils] Clarify rationale for URL escaping in comment, move escape routines to utils and add some tests --- test/test_utils.py | 31 +++++++++++++++++++++++++++++++ youtube_dl/YoutubeDL.py | 20 +++++++------------- youtube_dl/utils.py | 18 ++++++++++++++++++ 3 files changed, 56 insertions(+), 13 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 8d8997977..e90caed29 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -40,6 +40,8 @@ from youtube_dl.utils import ( parse_iso8601, strip_jsonp, uppercase_escape, + escape_rfc3986, + escape_url, ) @@ -286,5 +288,34 @@ class TestUtil(unittest.TestCase): self.assertEqual(uppercase_escape('aä'), 'aä') self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐') + def test_escape_rfc3986(self): + reserved = "!*'();:@&=+$,/?#[]" + unreserved = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.~' + self.assertEqual(escape_rfc3986(reserved), reserved) + self.assertEqual(escape_rfc3986(unreserved), unreserved) + self.assertEqual(escape_rfc3986('тест'), '%D1%82%D0%B5%D1%81%D1%82') + self.assertEqual(escape_rfc3986('%D1%82%D0%B5%D1%81%D1%82'), '%D1%82%D0%B5%D1%81%D1%82') + self.assertEqual(escape_rfc3986('foo bar'), 'foo%20bar') + self.assertEqual(escape_rfc3986('foo%20bar'), 'foo%20bar') + + def test_escape_url(self): + self.assertEqual( + escape_url('http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavré_FD.mp4'), + 'http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavre%CC%81_FD.mp4' + ) + self.assertEqual( + escape_url('http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erklärt/Das-Erste/Video?documentId=22673108&bcastId=5290'), + 'http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erkl%C3%A4rt/Das-Erste/Video?documentId=22673108&bcastId=5290' + ) + self.assertEqual( + escape_url('http://тест.рф/фрагмент'), + 'http://тест.рф/%D1%84%D1%80%D0%B0%D0%B3%D0%BC%D0%B5%D0%BD%D1%82' + ) + self.assertEqual( + escape_url('http://тест.рф/абв?абв=абв#абв'), + 'http://тест.рф/%D0%B0%D0%B1%D0%B2?%D0%B0%D0%B1%D0%B2=%D0%B0%D0%B1%D0%B2#%D0%B0%D0%B1%D0%B2' + ) + self.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0') + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 99cd05e65..9519594c9 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -28,7 +28,7 @@ from .utils import ( compat_str, compat_urllib_error, compat_urllib_request, - compat_urllib_parse_urlparse, + escape_url, ContentTooShortError, date_from_str, DateRange, @@ -1243,20 +1243,14 @@ class YoutubeDL(object): def urlopen(self, req): """ Start an HTTP download """ + # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not + # always respected by websites, some tend to give out URLs with non percent-encoded + # non-ASCII characters (see telemb.py, ard.py [#3412]) # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991) - # Working around by replacing request's original URL with escaped one - + # To work around aforementioned issue we will replace request's original URL with + # percent-encoded one url = req if isinstance(req, compat_str) else req.get_full_url() - - def escape(component): - return compat_cookiejar.escape_path(component.encode('utf-8')) - - url_parsed = compat_urllib_parse_urlparse(url) - url_escaped = url_parsed._replace( - path=escape(url_parsed.path), - query=escape(url_parsed.query), - fragment=escape(url_parsed.fragment) - ).geturl() + url_escaped = escape_url(url) # Substitute URL if any change after escaping if url != url_escaped: diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index d920c65a4..9124c3621 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1418,6 +1418,24 @@ def uppercase_escape(s): lambda m: unicode_escape(m.group(0))[0], s) + +def escape_rfc3986(s): + """Escape non-ASCII characters as suggested by RFC 3986""" + if sys.version_info < (3, 0) and isinstance(s, unicode): + s = s.encode('utf-8') + return compat_urllib_parse.quote(s, "%/;:@&=+$,!~*'()?#[]") #"%/;:@&=+$,!~*'()?#[]+" #?#[]+ + + +def escape_url(url): + """Escape URL as suggested by RFC 3986""" + url_parsed = compat_urllib_parse_urlparse(url) + return url_parsed._replace( + path=escape_rfc3986(url_parsed.path), + params=escape_rfc3986(url_parsed.params), + query=escape_rfc3986(url_parsed.query), + fragment=escape_rfc3986(url_parsed.fragment) + ).geturl() + try: struct.pack(u'!I', 0) except TypeError: -- cgit v1.2.3 From 984e8e14ea266d406c253098f953e727ca8c19c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 13 Sep 2014 21:08:04 +0700 Subject: [utils] Remove debug garbage --- youtube_dl/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 9124c3621..e924b1688 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1423,7 +1423,7 @@ def escape_rfc3986(s): """Escape non-ASCII characters as suggested by RFC 3986""" if sys.version_info < (3, 0) and isinstance(s, unicode): s = s.encode('utf-8') - return compat_urllib_parse.quote(s, "%/;:@&=+$,!~*'()?#[]") #"%/;:@&=+$,!~*'()?#[]+" #?#[]+ + return compat_urllib_parse.quote(s, "%/;:@&=+$,!~*'()?#[]") def escape_url(url): -- cgit v1.2.3 From ceff3fd8ef8d22df88cee55b6e4a9f783c3dd8cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 13 Sep 2014 21:45:53 +0700 Subject: [pornhd] Fix extraction (Closes #3739) --- youtube_dl/extractor/pornhd.py | 51 ++++++++++++++++++------------------------ 1 file changed, 22 insertions(+), 29 deletions(-) diff --git a/youtube_dl/extractor/pornhd.py b/youtube_dl/extractor/pornhd.py index 718fe9aba..48ce6e730 100644 --- a/youtube_dl/extractor/pornhd.py +++ b/youtube_dl/extractor/pornhd.py @@ -27,47 +27,40 @@ class PornHdIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - title = self._og_search_title(webpage) - TITLE_SUFFIX = ' porn HD Video | PornHD.com ' - if title.endswith(TITLE_SUFFIX): - title = title[:-len(TITLE_SUFFIX)] - + title = self._html_search_regex( + r'<title>(.+) porn HD.+?', webpage, 'title') description = self._html_search_regex( r'
([^<]+)
', webpage, 'description', fatal=False) view_count = int_or_none(self._html_search_regex( - r'(\d+) views ', webpage, 'view count', fatal=False)) + r'(\d+) views\s*', webpage, 'view count', fatal=False)) - formats = [ - { - 'url': format_url, - 'ext': format.lower(), - 'format_id': '%s-%s' % (format.lower(), quality.lower()), - 'quality': 1 if quality.lower() == 'high' else 0, - } for format, quality, format_url in re.findall( - r'var __video([\da-zA-Z]+?)(Low|High)StreamUrl = \'(http://.+?)\?noProxy=1\'', webpage) - ] + videos = re.findall( + r'var __video([\da-zA-Z]+?)(Low|High)StreamUrl = \'(http://.+?)\?noProxy=1\'', webpage) mobj = re.search(r'flashVars = (?P{.+?});', webpage) if mobj: flashvars = json.loads(mobj.group('flashvars')) - formats.extend([ - { - 'url': flashvars['hashlink'].replace('?noProxy=1', ''), - 'ext': 'flv', - 'format_id': 'flv-low', - 'quality': 0, - }, - { - 'url': flashvars['hd'].replace('?noProxy=1', ''), - 'ext': 'flv', - 'format_id': 'flv-high', - 'quality': 1, - } - ]) + for key, quality in [('hashlink', 'low'), ('hd', 'high')]: + redirect_url = flashvars.get(key) + if redirect_url: + videos.append(('flv', quality, redirect_url)) thumbnail = flashvars['urlWallpaper'] else: thumbnail = self._og_search_thumbnail(webpage) + formats = [] + for format_, quality, redirect_url in videos: + format_id = '%s-%s' % (format_.lower(), quality.lower()) + video_url = self._download_webpage( + redirect_url, video_id, 'Downloading %s video link' % format_id, fatal=False) + if not video_url: + continue + formats.append({ + 'url': video_url, + 'ext': format_.lower(), + 'format_id': format_id, + 'quality': 1 if quality.lower() == 'high' else 0, + }) self._sort_formats(formats) return { -- cgit v1.2.3 From e5a93354bc6df76ed07c2e77a68203374b7ddf5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 13 Sep 2014 22:07:09 +0700 Subject: [drtv] Fix _VALID_URL (Closes #3735) --- youtube_dl/extractor/drtv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py index cdccfd376..9d6ce1f48 100644 --- a/youtube_dl/extractor/drtv.py +++ b/youtube_dl/extractor/drtv.py @@ -8,7 +8,7 @@ from ..utils import parse_iso8601 class DRTVIE(SubtitlesInfoExtractor): - _VALID_URL = r'http://(?:www\.)?dr\.dk/tv/se/[^/]+/(?P[\da-z-]+)' + _VALID_URL = r'http://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)+(?P[\da-z-]+)(?:[/#?]|$)' _TEST = { 'url': 'http://www.dr.dk/tv/se/partiets-mand/partiets-mand-7-8', -- cgit v1.2.3 From 2bca84e345b3f64f832128dbe24b3384e7ec9751 Mon Sep 17 00:00:00 2001 From: 5moufl Date: Sat, 13 Sep 2014 17:47:19 +0200 Subject: [BehindKink] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/behindkink.py | 56 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 youtube_dl/extractor/behindkink.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 5e7a0a775..5a02bea05 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -25,6 +25,7 @@ from .bambuser import BambuserIE, BambuserChannelIE from .bandcamp import BandcampIE, BandcampAlbumIE from .bbccouk import BBCCoUkIE from .beeg import BeegIE +from .behindkink import BehindKinkIE from .bilibili import BiliBiliIE from .blinkx import BlinkxIE from .bliptv import BlipTVIE, BlipTVUserIE diff --git a/youtube_dl/extractor/behindkink.py b/youtube_dl/extractor/behindkink.py new file mode 100644 index 000000000..f0a86fda3 --- /dev/null +++ b/youtube_dl/extractor/behindkink.py @@ -0,0 +1,56 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import url_basename + + +class BehindKinkIE(InfoExtractor): + _VALID_URL = r'(?:http://)(?:www\.)?behindkink\.com/(?P[0-9]{4})/(?P[0-9]{2})/(?P[0-9]{2})/(?P[^/?_]+)' + _TEST = { + 'url': 'http://www.behindkink.com/2014/08/14/ab1576-performers-voice-finally-heard-the-bill-is-killed/', + 'md5': '41ad01222b8442089a55528fec43ec01', + 'info_dict': { + 'id': '36370', + 'ext': 'mp4', + 'title': 'AB1576 - PERFORMERS VOICE FINALLY HEARD - THE BILL IS KILLED!', + 'description': 'The adult industry voice was finally heard as Assembly Bill 1576 remained\xa0 in suspense today at the Senate Appropriations Hearing. AB1576 was, among other industry damaging issues, a condom mandate...', + 'upload_date': '20140814', + 'thumbnail': 'http://www.behindkink.com/wp-content/uploads/2014/08/36370_AB1576_Win.jpg', + 'age_limit': 18, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + display_id = mobj.group('id') + year = mobj.group('year') + month = mobj.group('month') + day = mobj.group('day') + upload_date = year + month + day + + webpage_url = 'http://www.behindkink.com/' + year + '/' + month + '/' + day + '/' + display_id + webpage = self._download_webpage(webpage_url, display_id) + + self.report_extraction(display_id) + video_url = self._search_regex( + r"'file':\s*'([^']+)'", + webpage, 'URL base') + + video_id = url_basename(video_url) + video_id = video_id.split('_')[0] + self.report_extraction(video_id) + + return { + 'id': video_id, + 'url': video_url, + 'ext': 'mp4', + 'title': self._og_search_title(webpage), + 'display_id': display_id, + 'thumbnail': self._og_search_thumbnail(webpage), + 'description': self._og_search_description(webpage), + 'upload_date': upload_date, + 'age_limit': 18, + } -- cgit v1.2.3 From 20ff802c9bcff954ac10b10e9254078f308f037d Mon Sep 17 00:00:00 2001 From: naglis Date: Sun, 14 Sep 2014 00:12:36 +0300 Subject: [cloudy] Add new extractor. Closes #3743 --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/cloudy.py | 67 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 youtube_dl/extractor/cloudy.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 1bf5c51b4..48683ebcc 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -46,6 +46,7 @@ from .cinemassacre import CinemassacreIE from .clipfish import ClipfishIE from .cliphunter import CliphunterIE from .clipsyndicate import ClipsyndicateIE +from .cloudy import CloudyIE from .clubic import ClubicIE from .cmt import CMTIE from .cnet import CNETIE diff --git a/youtube_dl/extractor/cloudy.py b/youtube_dl/extractor/cloudy.py new file mode 100644 index 000000000..73c6e3d49 --- /dev/null +++ b/youtube_dl/extractor/cloudy.py @@ -0,0 +1,67 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + compat_parse_qs, + compat_urllib_parse, +) + + +class CloudyIE(InfoExtractor): + _VALID_URL = r'''(?x) + https?://(?:www\.)?cloudy\.ec/ + (?:v/|embed\.php\?id=) + (?P[A-Za-z0-9]+) + ''' + _API_URL = 'http://www.cloudy.ec/api/player.api.php?%s' + _TEST = { + 'url': 'https://www.cloudy.ec/v/af511e2527aac', + 'md5': '5cb253ace826a42f35b4740539bedf07', + 'info_dict': { + 'id': 'af511e2527aac', + 'ext': 'flv', + 'title': 'Funny Cats and Animals Compilation june 2013', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + url = 'http://www.cloudy.ec/embed.php?id=%s' % video_id + webpage = self._download_webpage(url, video_id) + + file_key = self._search_regex( + r'filekey\s*=\s*"([^"]+)"', webpage, 'file_key') + data_url = self._API_URL % compat_urllib_parse.urlencode({ + 'file': video_id, + 'key': file_key, + }) + player_data = self._download_webpage( + data_url, video_id, 'Downloading player data') + data = compat_parse_qs(player_data) + + if 'error' in data: + raise ExtractorError( + '%s error: %s' % (self.IE_NAME, ' '.join(data['error_msg'])), + expected=True) + + title = data.get('title', [None])[0] + if title: + title = title.replace('&asdasdas', '').strip() + + formats = [] + formats.append({ + 'format_id': 'sd', + 'url': data.get('url', [None])[0], + }) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + } -- cgit v1.2.3 From 5dbf3b5c60e99585697cea95a34aa8fd6c109827 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 14 Sep 2014 05:01:25 +0700 Subject: [cloudy] Minor changes --- youtube_dl/extractor/cloudy.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/cloudy.py b/youtube_dl/extractor/cloudy.py index 73c6e3d49..95eda2e58 100644 --- a/youtube_dl/extractor/cloudy.py +++ b/youtube_dl/extractor/cloudy.py @@ -8,6 +8,7 @@ from ..utils import ( ExtractorError, compat_parse_qs, compat_urllib_parse, + remove_end, ) @@ -52,13 +53,15 @@ class CloudyIE(InfoExtractor): title = data.get('title', [None])[0] if title: - title = title.replace('&asdasdas', '').strip() + title = remove_end(title, '&asdasdas').strip() formats = [] - formats.append({ - 'format_id': 'sd', - 'url': data.get('url', [None])[0], - }) + video_url = data.get('url', [None])[0] + if video_url: + formats.append({ + 'format_id': 'sd', + 'url': video_url, + }) return { 'id': video_id, -- cgit v1.2.3 From 98676c08a1c4977f945f8e83c31c227f337176ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 14 Sep 2014 05:17:21 +0700 Subject: [cloudy] Add support for videoraj.ch --- youtube_dl/extractor/cloudy.py | 40 +++++++++++++++++++++++++++------------- 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/cloudy.py b/youtube_dl/extractor/cloudy.py index 95eda2e58..bfdfbcfe6 100644 --- a/youtube_dl/extractor/cloudy.py +++ b/youtube_dl/extractor/cloudy.py @@ -13,35 +13,49 @@ from ..utils import ( class CloudyIE(InfoExtractor): + _IE_DESC = 'cloudy.ec and videoraj.ch' _VALID_URL = r'''(?x) - https?://(?:www\.)?cloudy\.ec/ + https?://(?:www\.)?(?Pcloudy\.ec|videoraj\.ch)/ (?:v/|embed\.php\?id=) (?P[A-Za-z0-9]+) ''' - _API_URL = 'http://www.cloudy.ec/api/player.api.php?%s' - _TEST = { - 'url': 'https://www.cloudy.ec/v/af511e2527aac', - 'md5': '5cb253ace826a42f35b4740539bedf07', - 'info_dict': { - 'id': 'af511e2527aac', - 'ext': 'flv', - 'title': 'Funny Cats and Animals Compilation june 2013', + _EMBED_URL = 'http://www.%s/embed.php?id=%s' + _API_URL = 'http://www.%s/api/player.api.php?%s' + _TESTS = [ + { + 'url': 'https://www.cloudy.ec/v/af511e2527aac', + 'md5': '5cb253ace826a42f35b4740539bedf07', + 'info_dict': { + 'id': 'af511e2527aac', + 'ext': 'flv', + 'title': 'Funny Cats and Animals Compilation june 2013', + } + }, + { + 'url': 'http://www.videoraj.ch/v/47f399fd8bb60', + 'md5': '7d0f8799d91efd4eda26587421c3c3b0', + 'info_dict': { + 'id': '47f399fd8bb60', + 'ext': 'flv', + 'title': 'Burning a New iPhone 5 with Gasoline - Will it Survive?', + } } - } + ] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) + video_host = mobj.group('host') video_id = mobj.group('id') - url = 'http://www.cloudy.ec/embed.php?id=%s' % video_id + url = self._EMBED_URL % (video_host, video_id) webpage = self._download_webpage(url, video_id) file_key = self._search_regex( r'filekey\s*=\s*"([^"]+)"', webpage, 'file_key') - data_url = self._API_URL % compat_urllib_parse.urlencode({ + data_url = self._API_URL % (video_host, compat_urllib_parse.urlencode({ 'file': video_id, 'key': file_key, - }) + })) player_data = self._download_webpage( data_url, video_id, 'Downloading player data') data = compat_parse_qs(player_data) -- cgit v1.2.3 From 3da0e1f8cd366fe64e0c1d072b77f5ce5ad9ba93 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 14 Sep 2014 12:20:14 +0200 Subject: [tumblr] Modernize --- youtube_dl/extractor/tumblr.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/tumblr.py b/youtube_dl/extractor/tumblr.py index 2882c1809..306fe8974 100644 --- a/youtube_dl/extractor/tumblr.py +++ b/youtube_dl/extractor/tumblr.py @@ -10,7 +10,7 @@ from ..utils import ( class TumblrIE(InfoExtractor): - _VALID_URL = r'http://(?P.*?)\.tumblr\.com/((post)|(video))/(?P\d*)($|/)' + _VALID_URL = r'http://(?P.*?)\.tumblr\.com/(?:post|video)/(?P[0-9]+)(?:$|[/?#])' _TESTS = [{ 'url': 'http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes', 'md5': '479bb068e5b16462f5176a6828829767', @@ -56,13 +56,15 @@ class TumblrIE(InfoExtractor): # The only place where you can get a title, it's not complete, # but searching in other places doesn't work for all videos - video_title = self._html_search_regex(r'(?P<title>.*?)(?: \| Tumblr)?', - webpage, 'title', flags=re.DOTALL) + video_title = self._html_search_regex( + r'(?s)(?P<title>.*?)(?: \| Tumblr)?', + webpage, 'title') - return [{'id': video_id, - 'url': video_url, - 'title': video_title, - 'description': self._html_search_meta('description', webpage), - 'thumbnail': video_thumbnail, - 'ext': ext - }] + return { + 'id': video_id, + 'url': video_url, + 'title': video_title, + 'description': self._html_search_meta('description', webpage), + 'thumbnail': video_thumbnail, + 'ext': ext, + } -- cgit v1.2.3 From ae91f6f99be21e95c8503c184b1397c6faf003d2 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 14 Sep 2014 12:20:27 +0200 Subject: release 2014.09.14 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 7496e9296..c602fcd9e 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.09.12' +__version__ = '2014.09.14' -- cgit v1.2.3 From 11ab41658e050b1540344233750615086ef24af7 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 14 Sep 2014 12:23:37 +0200 Subject: [devscripts/release] Do not restore youtube-dl{,.exe} binaries --- devscripts/release.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/devscripts/release.sh b/devscripts/release.sh index 453087e5f..691517ceb 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -73,7 +73,6 @@ RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz" (cd build/$version/ && sha1sum $RELEASE_FILES > SHA1SUMS) (cd build/$version/ && sha256sum $RELEASE_FILES > SHA2-256SUMS) (cd build/$version/ && sha512sum $RELEASE_FILES > SHA2-512SUMS) -git checkout HEAD -- youtube-dl youtube-dl.exe /bin/echo -e "\n### Signing and uploading the new binaries to yt-dl.org ..." for f in $RELEASE_FILES; do gpg --passphrase-repeat 5 --detach-sig "build/$version/$f"; done -- cgit v1.2.3 From 5ba693d0825fc84dd3441f305d6d48e5332aac75 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 14 Sep 2014 12:23:41 +0200 Subject: release 2014.09.14.1 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index c602fcd9e..4846848df 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.09.14' +__version__ = '2014.09.14.1' -- cgit v1.2.3 From 3b11e86eeba992f5fafda8ef4b73b2ef105ab85f Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 14 Sep 2014 12:56:04 +0200 Subject: release 2014.09.14.2 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 4846848df..1c9f978af 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.09.14.1' +__version__ = '2014.09.14.2' -- cgit v1.2.3 From eb3bd7ba8db8a31f5262d2d8f335700d1664a1bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 14 Sep 2014 19:04:16 +0700 Subject: [cloudy] Retry extraction on 410 status code (#3743 #3744) --- youtube_dl/extractor/cloudy.py | 56 ++++++++++++++++++++++++++++++------------ 1 file changed, 40 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/cloudy.py b/youtube_dl/extractor/cloudy.py index bfdfbcfe6..386f080d2 100644 --- a/youtube_dl/extractor/cloudy.py +++ b/youtube_dl/extractor/cloudy.py @@ -9,6 +9,8 @@ from ..utils import ( compat_parse_qs, compat_urllib_parse, remove_end, + HEADRequest, + compat_HTTPError, ) @@ -21,6 +23,7 @@ class CloudyIE(InfoExtractor): ''' _EMBED_URL = 'http://www.%s/embed.php?id=%s' _API_URL = 'http://www.%s/api/player.api.php?%s' + _MAX_TRIES = 2 _TESTS = [ { 'url': 'https://www.cloudy.ec/v/af511e2527aac', @@ -42,24 +45,30 @@ class CloudyIE(InfoExtractor): } ] - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_host = mobj.group('host') - video_id = mobj.group('id') + def _extract_video(self, video_host, video_id, file_key, error_url=None, try_num=0): - url = self._EMBED_URL % (video_host, video_id) - webpage = self._download_webpage(url, video_id) + if try_num > self._MAX_TRIES - 1: + raise ExtractorError('Unable to extract video URL', expected=True) - file_key = self._search_regex( - r'filekey\s*=\s*"([^"]+)"', webpage, 'file_key') - data_url = self._API_URL % (video_host, compat_urllib_parse.urlencode({ + form = { 'file': video_id, 'key': file_key, - })) + } + + if error_url: + form.update({ + 'numOfErrors': try_num, + 'errorCode': '404', + 'errorUrl': error_url, + }) + + data_url = self._API_URL % (video_host, compat_urllib_parse.urlencode(form)) player_data = self._download_webpage( data_url, video_id, 'Downloading player data') data = compat_parse_qs(player_data) + try_num += 1 + if 'error' in data: raise ExtractorError( '%s error: %s' % (self.IE_NAME, ' '.join(data['error_msg'])), @@ -69,16 +78,31 @@ class CloudyIE(InfoExtractor): if title: title = remove_end(title, '&asdasdas').strip() - formats = [] video_url = data.get('url', [None])[0] + if video_url: - formats.append({ - 'format_id': 'sd', - 'url': video_url, - }) + try: + self._request_webpage(HEADRequest(video_url), video_id, 'Checking video URL') + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code in [404, 410]: + self.report_warning('Invalid video URL, requesting another', video_id) + return self._extract_video(video_host, video_id, file_key, video_url, try_num) return { 'id': video_id, + 'url': video_url, 'title': title, - 'formats': formats, } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_host = mobj.group('host') + video_id = mobj.group('id') + + url = self._EMBED_URL % (video_host, video_id) + webpage = self._download_webpage(url, video_id) + + file_key = self._search_regex( + r'filekey\s*=\s*"([^"]+)"', webpage, 'file_key') + + return self._extract_video(video_host, video_id, file_key) -- cgit v1.2.3 From 56d1912f1d8b886ef0355565e2a4e24560d8a7a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 14 Sep 2014 14:07:33 +0200 Subject: Add a completion script generator for the fish shell --- .gitignore | 1 + MANIFEST.in | 1 + Makefile | 20 ++++++++++++------ devscripts/fish-completion.in | 5 +++++ devscripts/fish-completion.py | 48 +++++++++++++++++++++++++++++++++++++++++++ setup.py | 1 + 6 files changed, 70 insertions(+), 6 deletions(-) create mode 100644 devscripts/fish-completion.in create mode 100755 devscripts/fish-completion.py diff --git a/.gitignore b/.gitignore index b8128fab1..e44977ca3 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,7 @@ MANIFEST README.txt youtube-dl.1 youtube-dl.bash-completion +youtube-dl.fish youtube-dl youtube-dl.exe youtube-dl.tar.gz diff --git a/MANIFEST.in b/MANIFEST.in index d43cc1f3b..5743f605a 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -2,5 +2,6 @@ include README.md include test/*.py include test/*.json include youtube-dl.bash-completion +include youtube-dl.fish include youtube-dl.1 recursive-include docs Makefile conf.py *.rst diff --git a/Makefile b/Makefile index 088a9320b..6272b826c 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ -all: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion +all: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.fish clean: - rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz + rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.fish cleanall: clean rm -f youtube-dl youtube-dl.exe @@ -29,6 +29,8 @@ install: youtube-dl youtube-dl.1 youtube-dl.bash-completion install -m 644 youtube-dl.1 $(DESTDIR)$(MANDIR)/man1 install -d $(DESTDIR)$(SYSCONFDIR)/bash_completion.d install -m 644 youtube-dl.bash-completion $(DESTDIR)$(SYSCONFDIR)/bash_completion.d/youtube-dl + install -d $(DESTDIR)$(SYSCONFDIR)/fish/completions + install -m 644 youtube-dl.fish $(DESTDIR)$(SYSCONFDIR)/fish/completions/youtube-dl.fish test: #nosetests --with-coverage --cover-package=youtube_dl --cover-html --verbose --processes 4 test @@ -36,9 +38,9 @@ test: tar: youtube-dl.tar.gz -.PHONY: all clean install test tar bash-completion pypi-files +.PHONY: all clean install test tar bash-completion pypi-files fish-completion -pypi-files: youtube-dl.bash-completion README.txt youtube-dl.1 +pypi-files: youtube-dl.bash-completion README.txt youtube-dl.1 youtube-dl.fish youtube-dl: youtube_dl/*.py youtube_dl/*/*.py zip --quiet youtube-dl youtube_dl/*.py youtube_dl/*/*.py @@ -64,7 +66,12 @@ youtube-dl.bash-completion: youtube_dl/*.py youtube_dl/*/*.py devscripts/bash-co bash-completion: youtube-dl.bash-completion -youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion +youtube-dl.fish: youtube_dl/*.py youtube_dl/*/*.py devscripts/fish-completion.in + python devscripts/fish-completion.py + +fish-completion: youtube-dl.fish + +youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.fish @tar -czf youtube-dl.tar.gz --transform "s|^|youtube-dl/|" --owner 0 --group 0 \ --exclude '*.DS_Store' \ --exclude '*.kate-swp' \ @@ -78,5 +85,6 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash- -- \ bin devscripts test youtube_dl docs \ LICENSE README.md README.txt \ - Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion setup.py \ + Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion \ + youtube-dl.fish setup.py \ youtube-dl diff --git a/devscripts/fish-completion.in b/devscripts/fish-completion.in new file mode 100644 index 000000000..eb79765da --- /dev/null +++ b/devscripts/fish-completion.in @@ -0,0 +1,5 @@ + +{{commands}} + + +complete --command youtube-dl --arguments ":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory" diff --git a/devscripts/fish-completion.py b/devscripts/fish-completion.py new file mode 100755 index 000000000..f4aaf0201 --- /dev/null +++ b/devscripts/fish-completion.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python +from __future__ import unicode_literals + +import optparse +import os +from os.path import dirname as dirn +import sys + +sys.path.append(dirn(dirn((os.path.abspath(__file__))))) +import youtube_dl +from youtube_dl.utils import shell_quote + +FISH_COMPLETION_FILE = 'youtube-dl.fish' +FISH_COMPLETION_TEMPLATE = 'devscripts/fish-completion.in' + +EXTRA_ARGS = { + 'recode-video': ['--arguments', 'mp4 flv ogg webm mkv', '--exclusive'], + + # Options that need a file parameter + 'download-archive': ['--require-parameter'], + 'cookies': ['--require-parameter'], + 'load-info': ['--require-parameter'], + 'batch-file': ['--require-parameter'], +} + +def build_completion(opt_parser): + commands = [] + + for group in opt_parser.option_groups: + for option in group.option_list: + long_option = option.get_opt_string().strip('-') + help_msg = shell_quote([option.help]) + complete_cmd = ['complete', '--command', 'youtube-dl', '--long-option', long_option] + if option._short_opts: + complete_cmd += ['--short-option', option._short_opts[0].strip('-')] + if option.help != optparse.SUPPRESS_HELP: + complete_cmd += ['--description', option.help] + complete_cmd.extend(EXTRA_ARGS.get(long_option, [])) + commands.append(shell_quote(complete_cmd)) + + with open(FISH_COMPLETION_TEMPLATE) as f: + template = f.read() + filled_template = template.replace('{{commands}}', '\n'.join(commands)) + with open(FISH_COMPLETION_FILE, 'w') as f: + f.write(filled_template) + +parser = youtube_dl.parseOpts()[0] +build_completion(parser) diff --git a/setup.py b/setup.py index 03e7b358e..cf6b92b0f 100644 --- a/setup.py +++ b/setup.py @@ -48,6 +48,7 @@ if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe': else: files_spec = [ ('etc/bash_completion.d', ['youtube-dl.bash-completion']), + ('etc/fish/completions', ['youtube-dl.fish']), ('share/doc/youtube_dl', ['README.txt']), ('share/man/man1', ['youtube-dl.1']) ] -- cgit v1.2.3 From b04c8f735805ea2671429ac8d683c2887a6b4db8 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 14 Sep 2014 16:48:15 +0200 Subject: release 2014.09.14.3 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 1c9f978af..f32e792ad 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.09.14.2' +__version__ = '2014.09.14.3' -- cgit v1.2.3 From 98703c7fbfcf06348220aa63f9422cdd792cfe1a Mon Sep 17 00:00:00 2001 From: Haricharan Padmanaban Date: Sun, 14 Sep 2014 23:14:00 -0500 Subject: Einthusan Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/einthusan.py | 54 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) create mode 100644 youtube_dl/extractor/einthusan.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index de6e8ee30..7f0736ee8 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -77,6 +77,7 @@ from .dropbox import DropboxIE from .ebaumsworld import EbaumsWorldIE from .ehow import EHowIE from .eighttracks import EightTracksIE +from .einthusan import EinthusanIE from .eitb import EitbIE from .ellentv import ( EllenTVIE, diff --git a/youtube_dl/extractor/einthusan.py b/youtube_dl/extractor/einthusan.py new file mode 100644 index 000000000..712368faf --- /dev/null +++ b/youtube_dl/extractor/einthusan.py @@ -0,0 +1,54 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class EinthusanIE(InfoExtractor): + _VALID_URL = r'http://(?:www\.)?einthusan\.com/movies/watch.php\?(.*)?id=(?P[0-9]+).*?' + _TESTS = [ + { + 'url': 'http://www.einthusan.com/movies/watch.php?hindimoviesonline=Ek+Villain&lang=hindi&id=2447', + 'md5': 'af244f4458cd667205e513d75da5b8b1', + 'info_dict': { + 'id': '2447', + 'ext': 'mp4', + 'title': 'Ek Villain', + 'thumbnail': 're:^https?://.*\.jpg$', + } + }, + { + 'url': 'http://www.einthusan.com/movies/watch.php?id=1671', + 'md5': 'ef63c7a803e22315880ed182c10d1c5c', + 'info_dict': { + 'id': '1671', + 'ext': 'mp4', + 'title': 'Soodhu Kavvuum', + 'thumbnail': 're:^https?://.*\.jpg$', + } + }, + ] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + webpage = self._download_webpage(url, video_id) + + video_title = self._html_search_regex(r'''

(.*?)

''', webpage, 'title') + + video_url = self._html_search_regex( + r'''(?s)jwplayer\("mediaplayer"\)\.setup\({.*?'file': '([^']+)'.*?}\);''', webpage, 'video url') + + thumb_rel_url = self._html_search_regex( + r'''''', webpage, "thumbnail url") + thumb_abs_url = re.sub('\.\.', 'http://www.einthusan.com', thumb_rel_url) + + return { + 'id': video_id, + 'ext': 'mp4', + 'title': video_title, + 'url': video_url, + 'thumbnail': thumb_abs_url, + } -- cgit v1.2.3 From 59d284c3161797e31d7ca36da71464696adb8620 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 15 Sep 2014 19:19:37 +0700 Subject: [vporn] Make video URL regex more strict There is a garbage instead of proper URL for some HD videos --- youtube_dl/extractor/vporn.py | 60 +++++++++++++++++++++++++++++++------------ 1 file changed, 43 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/vporn.py b/youtube_dl/extractor/vporn.py index 426369c51..2d23effcc 100644 --- a/youtube_dl/extractor/vporn.py +++ b/youtube_dl/extractor/vporn.py @@ -11,22 +11,48 @@ from ..utils import ( class VpornIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?vporn\.com/[^/]+/(?P[^/]+)/(?P\d+)' - _TEST = { - 'url': 'http://www.vporn.com/masturbation/violet-on-her-th-birthday/497944/', - 'md5': 'facf37c1b86546fa0208058546842c55', - 'info_dict': { - 'id': '497944', - 'display_id': 'violet-on-her-th-birthday', - 'ext': 'mp4', - 'title': 'Violet on her 19th birthday', - 'description': 'Violet dances in front of the camera which is sure to get you horny.', - 'thumbnail': 're:^https?://.*\.jpg$', - 'uploader': 'kileyGrope', - 'categories': ['Masturbation', 'Teen'], - 'duration': 393, - 'age_limit': 18, - } - } + _TESTS = [ + { + 'url': 'http://www.vporn.com/masturbation/violet-on-her-th-birthday/497944/', + 'md5': 'facf37c1b86546fa0208058546842c55', + 'info_dict': { + 'id': '497944', + 'display_id': 'violet-on-her-th-birthday', + 'ext': 'mp4', + 'title': 'Violet on her 19th birthday', + 'description': 'Violet dances in front of the camera which is sure to get you horny.', + 'thumbnail': 're:^https?://.*\.jpg$', + 'uploader': 'kileyGrope', + 'categories': ['Masturbation', 'Teen'], + 'duration': 393, + 'age_limit': 18, + 'view_count': int, + 'like_count': int, + 'dislike_count': int, + 'comment_count': int, + } + }, + { + 'url': 'http://www.vporn.com/female/hana-shower/523564/', + 'md5': 'ced35a4656198a1664cf2cda1575a25f', + 'info_dict': { + 'id': '523564', + 'display_id': 'hana-shower', + 'ext': 'mp4', + 'title': 'Hana Shower', + 'description': 'Hana showers at the bathroom.', + 'thumbnail': 're:^https?://.*\.jpg$', + 'uploader': 'Hmmmmm', + 'categories': ['Big Boobs', 'Erotic', 'Teen', 'Female'], + 'duration': 588, + 'age_limit': 18, + 'view_count': int, + 'like_count': int, + 'dislike_count': int, + 'comment_count': int, + } + }, + ] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -64,7 +90,7 @@ class VpornIE(InfoExtractor): formats = [] - for video in re.findall(r'flashvars\.videoUrl([^=]+?)\s*=\s*"([^"]+)"', webpage): + for video in re.findall(r'flashvars\.videoUrl([^=]+?)\s*=\s*"(https?://[^"]+)"', webpage): video_url = video[1] fmt = { 'url': video_url, -- cgit v1.2.3 From fdea3abdf8242eb72604353c044a06a4d3f0f753 Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis Date: Mon, 15 Sep 2014 16:08:20 +0300 Subject: [turbo] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/turbo.py | 67 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 youtube_dl/extractor/turbo.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 48683ebcc..7af0cfd8f 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -365,6 +365,7 @@ from .trutube import TruTubeIE from .tube8 import Tube8IE from .tudou import TudouIE from .tumblr import TumblrIE +from .turbo import TurboIE from .tutv import TutvIE from .tvigle import TvigleIE from .tvp import TvpIE diff --git a/youtube_dl/extractor/turbo.py b/youtube_dl/extractor/turbo.py new file mode 100644 index 000000000..29703a8a9 --- /dev/null +++ b/youtube_dl/extractor/turbo.py @@ -0,0 +1,67 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + int_or_none, + qualities, + xpath_text, +) + + +class TurboIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?turbo\.fr/videos-voiture/(?P[0-9]+)-' + _API_URL = 'http://www.turbo.fr/api/tv/xml.php?player_generique=player_generique&id={0:}' + _TEST = { + 'url': 'http://www.turbo.fr/videos-voiture/454443-turbo-du-07-09-2014-renault-twingo-3-bentley-continental-gt-speed-ces-guide-achat-dacia.html', + 'md5': '33f4b91099b36b5d5a91f84b5bcba600', + 'info_dict': { + 'id': '454443', + 'ext': 'mp4', + 'duration': 3715, + 'title': 'Turbo du 07/09/2014 : Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia... ', + 'description': 'Retrouvez dans cette rubrique toutes les vidéos de l\'Turbo du 07/09/2014 : Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia... ', + 'thumbnail': 're:^https?://.*\.jpg$', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + + playlist = self._download_xml(self._API_URL.format(video_id), video_id) + item = playlist.find('./channel/item') + if item is None: + raise ExtractorError('Playlist item was not found', expected=True) + + title = xpath_text(item, './title', 'title') + duration = int_or_none(xpath_text(item, './durate', 'duration')) + thumbnail = xpath_text(item, './visuel_clip', 'thumbnail') + description = self._og_search_description(webpage) + + formats = [] + get_quality = qualities(['3g', 'sd', 'hq']) + for child in item: + m = re.search(r'url_video_(?P.+)', child.tag) + if m: + quality = m.group('quality') + formats.append({ + 'format_id': quality, + 'url': child.text, + 'quality': get_quality(quality), + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'duration': duration, + 'thumbnail': thumbnail, + 'description': description, + 'formats': formats, + } -- cgit v1.2.3 From 6d1f2431bd3ac1bd7d59c01747c190f42656f383 Mon Sep 17 00:00:00 2001 From: 5moufl Date: Mon, 15 Sep 2014 15:09:17 +0200 Subject: [BehindKink] Minor fixes - fix _VALID_URL regex - remove unnecessary variable - remove second call of report_extraction --- youtube_dl/extractor/behindkink.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/behindkink.py b/youtube_dl/extractor/behindkink.py index f0a86fda3..12f13aae9 100644 --- a/youtube_dl/extractor/behindkink.py +++ b/youtube_dl/extractor/behindkink.py @@ -8,7 +8,7 @@ from ..utils import url_basename class BehindKinkIE(InfoExtractor): - _VALID_URL = r'(?:http://)(?:www\.)?behindkink\.com/(?P[0-9]{4})/(?P[0-9]{2})/(?P[0-9]{2})/(?P[^/?_]+)' + _VALID_URL = r'http://(?:www\.)?behindkink\.com/(?P[0-9]{4})/(?P[0-9]{2})/(?P[0-9]{2})/(?P[^/#?_]+)' _TEST = { 'url': 'http://www.behindkink.com/2014/08/14/ab1576-performers-voice-finally-heard-the-bill-is-killed/', 'md5': '41ad01222b8442089a55528fec43ec01', @@ -31,8 +31,7 @@ class BehindKinkIE(InfoExtractor): day = mobj.group('day') upload_date = year + month + day - webpage_url = 'http://www.behindkink.com/' + year + '/' + month + '/' + day + '/' + display_id - webpage = self._download_webpage(webpage_url, display_id) + webpage = self._download_webpage(url, display_id) self.report_extraction(display_id) video_url = self._search_regex( @@ -41,7 +40,6 @@ class BehindKinkIE(InfoExtractor): video_id = url_basename(video_url) video_id = video_id.split('_')[0] - self.report_extraction(video_id) return { 'id': video_id, -- cgit v1.2.3 From a020a0dc20ced6468ec46214c394f6f360735b1d Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 15 Sep 2014 15:10:24 +0200 Subject: [facebook] Fix support for untitled videos (Fixes #3757) --- test/test_utils.py | 8 ++++++++ youtube_dl/extractor/facebook.py | 13 ++++++++++--- youtube_dl/utils.py | 10 ++++++++++ 3 files changed, 28 insertions(+), 3 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 8d8997977..70050d2b8 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -40,6 +40,7 @@ from youtube_dl.utils import ( parse_iso8601, strip_jsonp, uppercase_escape, + limit_length, ) @@ -286,5 +287,12 @@ class TestUtil(unittest.TestCase): self.assertEqual(uppercase_escape('aä'), 'aä') self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐') + def test_limit_length(self): + self.assertEqual(limit_length(None, 12), None) + self.assertEqual(limit_length('foo', 12), 'foo') + self.assertTrue( + limit_length('foo bar baz asd', 12).startswith('foo bar')) + self.assertTrue('...' in limit_length('foo bar baz asd', 12)) + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index d675a939d..60e68d98a 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -12,8 +12,8 @@ from ..utils import ( compat_urllib_parse, compat_urllib_request, urlencode_postdata, - ExtractorError, + limit_length, ) @@ -37,6 +37,14 @@ class FacebookIE(InfoExtractor): 'duration': 38, 'title': 'Did you know Kei Nishikori is the first Asian man to ever reach a Grand Slam fin...', } + }, { + 'note': 'Video without discernible title', + 'url': 'https://www.facebook.com/video.php?v=274175099429670', + 'info_dict': { + 'id': '274175099429670', + 'ext': 'mp4', + 'title': 'Facebook video #274175099429670', + } }, { 'url': 'https://www.facebook.com/video.php?v=10204634152394104', 'only_matching': True, @@ -131,8 +139,7 @@ class FacebookIE(InfoExtractor): video_title = self._html_search_regex( r'(?s)(.*?)', webpage, 'alternative title', default=None) - if len(video_title) > 80 + 3: - video_title = video_title[:80] + '...' + video_title = limit_length(video_title, 80) if not video_title: video_title = 'Facebook video #%s' % video_id diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 247788078..3ac0f1f54 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1571,3 +1571,13 @@ except AttributeError: if ret: raise subprocess.CalledProcessError(ret, p.args, output=output) return output + + +def limit_length(s, length): + """ Add ellipses to overly long strings """ + if s is None: + return None + ELLIPSES = '...' + if len(s) > length: + return s[:length - len(ELLIPSES)] + ELLIPSES + return s -- cgit v1.2.3 From ca0e7a2b17387c552c41afe705016b98d6677f51 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 15 Sep 2014 15:11:22 +0200 Subject: release 2014.09.15 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index f32e792ad..97a5dcacf 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.09.14.3' +__version__ = '2014.09.15' -- cgit v1.2.3 From e5a79071a5023940fb0c39d3b04f8cc6a913dfee Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 15 Sep 2014 15:25:35 +0200 Subject: [daum] Support non-numeric video IDs (Fixes #3749) --- youtube_dl/extractor/daum.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/daum.py b/youtube_dl/extractor/daum.py index 6033cd94a..45d66e2e6 100644 --- a/youtube_dl/extractor/daum.py +++ b/youtube_dl/extractor/daum.py @@ -11,10 +11,10 @@ from ..utils import ( class DaumIE(InfoExtractor): - _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/.*?clipid=(?P\d+)' + _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/(?:v/|.*?clipid=)(?P[^?#&]+)' IE_NAME = 'daum.net' - _TEST = { + _TESTS = [{ 'url': 'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690', 'info_dict': { 'id': '52554690', @@ -24,11 +24,17 @@ class DaumIE(InfoExtractor): 'upload_date': '20130831', 'duration': 3868, }, - } + }, { + 'url': 'http://tvpot.daum.net/v/vab4dyeDBysyBssyukBUjBz', + 'only_matching': True, + }, { + 'url': 'http://tvpot.daum.net/v/07dXWRka62Y%24', + 'only_matching': True, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.group(1) + video_id = mobj.group('id') canonical_url = 'http://tvpot.daum.net/v/%s' % video_id webpage = self._download_webpage(canonical_url, video_id) full_id = self._search_regex( @@ -42,7 +48,6 @@ class DaumIE(InfoExtractor): 'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query, video_id, 'Downloading video formats info') - self.to_screen(u'%s: Getting video urls' % video_id) formats = [] for format_el in urls.findall('result/output_list/output_list'): profile = format_el.attrib['profile'] @@ -52,7 +57,7 @@ class DaumIE(InfoExtractor): }) url_doc = self._download_xml( 'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query, - video_id, note=False) + video_id, note='Downloading video data for %s format' % profile) format_url = url_doc.find('result/url').text formats.append({ 'url': format_url, -- cgit v1.2.3 From 21f2927f707dc1dfe9182a290571da1714f1ed63 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 15 Sep 2014 15:28:41 +0200 Subject: release 2014.09.15.1 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 97a5dcacf..cf0d862da 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.09.15' +__version__ = '2014.09.15.1' -- cgit v1.2.3 From d0e8b3d59bbb0951a4bf7e85baca8c40d3824dd6 Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis Date: Mon, 15 Sep 2014 16:59:03 +0300 Subject: [nosvideo] Make more robust against missing metadata --- youtube_dl/extractor/nosvideo.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/nosvideo.py b/youtube_dl/extractor/nosvideo.py index 8c2c428fc..f3be8f552 100644 --- a/youtube_dl/extractor/nosvideo.py +++ b/youtube_dl/extractor/nosvideo.py @@ -8,11 +8,11 @@ from ..utils import ( ExtractorError, compat_urllib_request, urlencode_postdata, + xpath_text, xpath_with_ns, ) _x = lambda p: xpath_with_ns(p, {'xspf': 'http://xspf.org/ns/0/'}) -_find = lambda el, p: el.find(_x(p)).text.strip() class NosVideoIE(InfoExtractor): @@ -53,9 +53,15 @@ class NosVideoIE(InfoExtractor): playlist = self._download_xml(playlist_url, video_id) track = playlist.find(_x('.//xspf:track')) - title = _find(track, './xspf:title') - url = _find(track, './xspf:file') - thumbnail = _find(track, './xspf:image') + if track is None: + raise ExtractorError( + 'XML playlist is missing the \'track\' element', + expected=True) + title = xpath_text(track, _x('./xspf:title'), 'title') + url = xpath_text(track, _x('./xspf:file'), 'URL', fatal=True) + thumbnail = xpath_text(track, _x('./xspf:image'), 'thumbnail') + if title is not None: + title = title.strip() formats = [{ 'format_id': 'sd', -- cgit v1.2.3 From 5419033935e75245756e984a2ef6ee3b6cb6831e Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 15 Sep 2014 23:27:18 +0200 Subject: Fixed tests --- youtube_dl/extractor/einthusan.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/einthusan.py b/youtube_dl/extractor/einthusan.py index 712368faf..97c5d7b72 100644 --- a/youtube_dl/extractor/einthusan.py +++ b/youtube_dl/extractor/einthusan.py @@ -10,7 +10,7 @@ class EinthusanIE(InfoExtractor): _VALID_URL = r'http://(?:www\.)?einthusan\.com/movies/watch.php\?(.*)?id=(?P[0-9]+).*?' _TESTS = [ { - 'url': 'http://www.einthusan.com/movies/watch.php?hindimoviesonline=Ek+Villain&lang=hindi&id=2447', + 'url': 'http://www.einthusan.com/movies/watch.php?id=2447', 'md5': 'af244f4458cd667205e513d75da5b8b1', 'info_dict': { 'id': '2447', -- cgit v1.2.3 From e2037b3f7d610beefa1bb3590a4552477035f696 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 15 Sep 2014 23:33:47 +0200 Subject: [einthusan] Add description and beautify --- youtube_dl/extractor/einthusan.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/einthusan.py b/youtube_dl/extractor/einthusan.py index 97c5d7b72..5dfea0d39 100644 --- a/youtube_dl/extractor/einthusan.py +++ b/youtube_dl/extractor/einthusan.py @@ -7,7 +7,7 @@ from .common import InfoExtractor class EinthusanIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?einthusan\.com/movies/watch.php\?(.*)?id=(?P[0-9]+).*?' + _VALID_URL = r'https?://(?:www\.)?einthusan\.com/movies/watch.php\?([^#]*?)id=(?P[0-9]+)' _TESTS = [ { 'url': 'http://www.einthusan.com/movies/watch.php?id=2447', @@ -17,6 +17,7 @@ class EinthusanIE(InfoExtractor): 'ext': 'mp4', 'title': 'Ek Villain', 'thumbnail': 're:^https?://.*\.jpg$', + 'description': 'md5:9d29fc91a7abadd4591fb862fa560d93', } }, { @@ -27,6 +28,7 @@ class EinthusanIE(InfoExtractor): 'ext': 'mp4', 'title': 'Soodhu Kavvuum', 'thumbnail': 're:^https?://.*\.jpg$', + 'description': 'md5:05d8a0c0281a4240d86d76e14f2f4d51', } }, ] @@ -36,19 +38,24 @@ class EinthusanIE(InfoExtractor): video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) - video_title = self._html_search_regex(r'''

(.*?)

''', webpage, 'title') + video_title = self._html_search_regex( + r'

(.*?)

', webpage, 'title') video_url = self._html_search_regex( - r'''(?s)jwplayer\("mediaplayer"\)\.setup\({.*?'file': '([^']+)'.*?}\);''', webpage, 'video url') + r'''(?s)jwplayer\("mediaplayer"\)\.setup\({.*?'file': '([^']+)'.*?}\);''', + webpage, 'video url') - thumb_rel_url = self._html_search_regex( - r'''''', webpage, "thumbnail url") - thumb_abs_url = re.sub('\.\.', 'http://www.einthusan.com', thumb_rel_url) + description = self._html_search_meta('description', webpage) + thumbnail = self._html_search_regex( + r'''''', + webpage, "thumbnail url", fatal=False) + if thumbnail is not None: + thumbnail = thumbnail.replace('..', 'http://www.einthusan.com') return { 'id': video_id, - 'ext': 'mp4', 'title': video_title, 'url': video_url, - 'thumbnail': thumb_abs_url, + 'thumbnail': thumbnail, + 'description': description, } -- cgit v1.2.3 From ae369738b0dfee0f62ee26102c154750f599aea5 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 15 Sep 2014 23:35:00 +0200 Subject: Credit @haricharan for einthusan (#3755) --- youtube_dl/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 42d0a0180..871ddead9 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -75,6 +75,7 @@ __authors__ = ( 'Ole Ernst', 'Aaron McDaniel (mcd1992)', 'Magnus Kolstad', + 'Hari Padmanaban', ) __license__ = 'Public Domain' -- cgit v1.2.3 From 37a81dff047c40b2a8fb163b94bc86ebe166db73 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 15 Sep 2014 23:37:22 +0200 Subject: [behindkink] Remove call to report_extraction --- youtube_dl/extractor/behindkink.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/behindkink.py b/youtube_dl/extractor/behindkink.py index 12f13aae9..31fdc0dcc 100644 --- a/youtube_dl/extractor/behindkink.py +++ b/youtube_dl/extractor/behindkink.py @@ -33,7 +33,6 @@ class BehindKinkIE(InfoExtractor): webpage = self._download_webpage(url, display_id) - self.report_extraction(display_id) video_url = self._search_regex( r"'file':\s*'([^']+)'", webpage, 'URL base') -- cgit v1.2.3 From 7bb5df1cdae27140319741144e188ae11c378aca Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 16 Sep 2014 10:08:34 +0200 Subject: [nhl] Match videos without catid (Fixes #3764) --- youtube_dl/extractor/nhl.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/nhl.py b/youtube_dl/extractor/nhl.py index ceda1dcc0..072d9cf8e 100644 --- a/youtube_dl/extractor/nhl.py +++ b/youtube_dl/extractor/nhl.py @@ -46,9 +46,9 @@ class NHLBaseInfoExtractor(InfoExtractor): class NHLIE(NHLBaseInfoExtractor): IE_NAME = 'nhl.com' - _VALID_URL = r'https?://video(?P\.[^.]*)?\.nhl\.com/videocenter/console\?.*?(?:[?&])id=(?P[0-9]+)' + _VALID_URL = r'https?://video(?P\.[^.]*)?\.nhl\.com/videocenter/console(?:\?(?:.*?[?&])?)id=(?P[0-9]+)' - _TEST = { + _TESTS = [{ 'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614', 'info_dict': { 'id': '453614', @@ -58,7 +58,10 @@ class NHLIE(NHLBaseInfoExtractor): 'duration': 18, 'upload_date': '20131006', }, - } + }, { + 'url': 'http://video.flames.nhl.com/videocenter/console?id=630616', + 'only_matching': True, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) -- cgit v1.2.3 From ed86ee3b4adf0dca1b40a382ca0ade1feb798851 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 16 Sep 2014 10:09:02 +0200 Subject: release 2014.09.16 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index cf0d862da..a59cbf879 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.09.15.1' +__version__ = '2014.09.16' -- cgit v1.2.3 From dc9f3568461a26e1306416ef5ea1ecdf4e9420b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 16 Sep 2014 18:47:06 +0700 Subject: [ard] Remove URL path escaping This is now implemented generic in YoutubeDL.urlopen --- youtube_dl/extractor/ard.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index 12457f0f9..54cec1c2f 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -51,9 +51,6 @@ class ARDMediathekIE(InfoExtractor): else: video_id = m.group('video_id') - urlp = compat_urllib_parse_urlparse(url) - url = urlp._replace(path=compat_urllib_parse.quote(urlp.path.encode('utf-8'))).geturl() - webpage = self._download_webpage(url, video_id) if '>Der gewünschte Beitrag ist nicht mehr verfügbar.<' in webpage: -- cgit v1.2.3 From 64892c0b79b401ab487c8facb5a646011873194c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 16 Sep 2014 23:47:59 +0700 Subject: [francetv] Adapt to new API (Closes #3751, closes #3769) --- youtube_dl/extractor/francetv.py | 204 +++++++++++++++++++++------------------ 1 file changed, 108 insertions(+), 96 deletions(-) diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index 1b0e8e5d5..0b3374d97 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -8,45 +8,68 @@ import json from .common import InfoExtractor from ..utils import ( compat_urlparse, + ExtractorError, + clean_html, + parse_duration, + compat_urllib_parse_urlparse, + int_or_none, ) class FranceTVBaseInfoExtractor(InfoExtractor): - def _extract_video(self, video_id): - info = self._download_xml( - 'http://www.francetvinfo.fr/appftv/webservices/video/' - 'getInfosOeuvre.php?id-diffusion=' - + video_id, video_id, 'Downloading XML config') - - manifest_url = info.find('videos/video/url').text - manifest_url = manifest_url.replace('/z/', '/i/') - - if manifest_url.startswith('rtmp'): - formats = [{'url': manifest_url, 'ext': 'flv'}] - else: - formats = [] - available_formats = self._search_regex(r'/[^,]*,(.*?),k\.mp4', manifest_url, 'available formats') - for index, format_descr in enumerate(available_formats.split(',')): - format_info = { - 'url': manifest_url.replace('manifest.f4m', 'index_%d_av.m3u8' % index), - 'ext': 'mp4', - } - m_resolution = re.search(r'(?P\d+)x(?P\d+)', format_descr) - if m_resolution is not None: - format_info.update({ - 'width': int(m_resolution.group('width')), - 'height': int(m_resolution.group('height')), - }) - formats.append(format_info) - - thumbnail_path = info.find('image').text + def _extract_video(self, video_id, catalogue): + info = self._download_json( + 'http://webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/?idDiffusion=%s&catalogue=%s' + % (video_id, catalogue), + video_id, 'Downloading video JSON') + + if info.get('status') == 'NOK': + raise ExtractorError( + '%s returned error: %s' % (self.IE_NAME, info['message']), expected=True) + + formats = [] + for video in info['videos']: + if video['statut'] != 'ONLINE': + continue + video_url = video['url'] + if not video_url: + continue + format_id = video['format'] + if video_url.endswith('.f4m'): + video_url_parsed = compat_urllib_parse_urlparse(video_url) + f4m_url = self._download_webpage( + 'http://hdfauth.francetv.fr/esi/urltokengen2.html?url=%s' % video_url_parsed.path, + video_id, 'Downloading f4m manifest token', fatal=False) + if f4m_url: + f4m_formats = self._extract_f4m_formats(f4m_url, video_id) + for f4m_format in f4m_formats: + f4m_format['preference'] = 1 + formats.extend(f4m_formats) + elif video_url.endswith('.m3u8'): + formats.extend(self._extract_m3u8_formats(video_url, video_id)) + elif video_url.startswith('rtmp'): + formats.append({ + 'url': video_url, + 'format_id': 'rtmp-%s' % format_id, + 'ext': 'flv', + 'preference': 1, + }) + else: + formats.append({ + 'url': video_url, + 'format_id': format_id, + 'preference': 2, + }) + self._sort_formats(formats) return { 'id': video_id, - 'title': info.find('titre').text, + 'title': info['titre'], + 'description': clean_html(info['synopsis']), + 'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', info['image']), + 'duration': parse_duration(info['duree']), + 'timestamp': int_or_none(info['diffusion']['timestamp']), 'formats': formats, - 'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', thumbnail_path), - 'description': info.find('synopsis').text, } @@ -61,7 +84,7 @@ class PluzzIE(FranceTVBaseInfoExtractor): webpage = self._download_webpage(url, title) video_id = self._search_regex( r'data-diffusion="(\d+)"', webpage, 'ID') - return self._extract_video(video_id) + return self._extract_video(video_id, 'Pluzz') class FranceTvInfoIE(FranceTVBaseInfoExtractor): @@ -70,13 +93,13 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor): _TESTS = [{ 'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html', + 'md5': '9cecf35f99c4079c199e9817882a9a1c', 'info_dict': { 'id': '84981923', - 'ext': 'mp4', + 'ext': 'flv', 'title': 'Soir 3', - }, - 'params': { - 'skip_download': True, + 'upload_date': '20130826', + 'timestamp': 1377548400, }, }, { 'url': 'http://www.francetvinfo.fr/elections/europeennes/direct-europeennes-regardez-le-debat-entre-les-candidats-a-la-presidence-de-la-commission_600639.html', @@ -88,15 +111,17 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor): }, 'params': { 'skip_download': 'HLS (reqires ffmpeg)' - } + }, + 'skip': 'Ce direct est terminé et sera disponible en rattrapage dans quelques minutes.', }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) page_title = mobj.group('title') webpage = self._download_webpage(url, page_title) - video_id = self._search_regex(r'id-video=((?:[^0-9]*?_)?[0-9]+)[@"]', webpage, 'video id') - return self._extract_video(video_id) + video_id, catalogue = self._search_regex( + r'id-video=([^@]+@[^"]+)', webpage, 'video id').split('@') + return self._extract_video(video_id, catalogue) class FranceTVIE(FranceTVBaseInfoExtractor): @@ -112,91 +137,77 @@ class FranceTVIE(FranceTVBaseInfoExtractor): # france2 { 'url': 'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104', - 'file': '75540104.mp4', + 'md5': 'c03fc87cb85429ffd55df32b9fc05523', 'info_dict': { - 'title': '13h15, le samedi...', - 'description': 'md5:2e5b58ba7a2d3692b35c792be081a03d', - }, - 'params': { - # m3u8 download - 'skip_download': True, + 'id': '109169362', + 'ext': 'flv', + 'title': '13h15, le dimanche...', + 'description': 'md5:9a0932bb465f22d377a449be9d1a0ff7', + 'upload_date': '20140914', + 'timestamp': 1410693600, }, }, # france3 { 'url': 'http://www.france3.fr/emissions/pieces-a-conviction/diffusions/13-11-2013_145575', + 'md5': '679bb8f8921f8623bd658fa2f8364da0', 'info_dict': { 'id': '000702326_CAPP_PicesconvictionExtrait313022013_120220131722_Au', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Le scandale du prix des médicaments', 'description': 'md5:1384089fbee2f04fc6c9de025ee2e9ce', - }, - 'params': { - # rtmp download - 'skip_download': True, + 'upload_date': '20131113', + 'timestamp': 1384380000, }, }, # france4 { 'url': 'http://www.france4.fr/emissions/hero-corp/videos/rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4', + 'md5': 'a182bf8d2c43d88d46ec48fbdd260c1c', 'info_dict': { 'id': 'rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Hero Corp Making of - Extrait 1', 'description': 'md5:c87d54871b1790679aec1197e73d650a', - }, - 'params': { - # rtmp download - 'skip_download': True, + 'upload_date': '20131106', + 'timestamp': 1383766500, }, }, # france5 { 'url': 'http://www.france5.fr/emissions/c-a-dire/videos/92837968', + 'md5': '78f0f4064f9074438e660785bbf2c5d9', 'info_dict': { - 'id': '92837968', - 'ext': 'mp4', + 'id': '108961659', + 'ext': 'flv', 'title': 'C à dire ?!', - 'description': 'md5:fb1db1cbad784dcce7c7a7bd177c8e2f', - }, - 'params': { - # m3u8 download - 'skip_download': True, + 'description': 'md5:1a4aeab476eb657bf57c4ff122129f81', + 'upload_date': '20140915', + 'timestamp': 1410795000, }, }, # franceo { 'url': 'http://www.franceo.fr/jt/info-afrique/04-12-2013', + 'md5': '52f0bfe202848b15915a2f39aaa8981b', 'info_dict': { - 'id': '92327925', - 'ext': 'mp4', - 'title': 'Infô-Afrique', + 'id': '108634970', + 'ext': 'flv', + 'title': 'Infô Afrique', 'description': 'md5:ebf346da789428841bee0fd2a935ea55', + 'upload_date': '20140915', + 'timestamp': 1410822000, }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - 'skip': 'The id changes frequently', }, ] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - if mobj.group('key'): - webpage = self._download_webpage(url, mobj.group('key')) - id_res = [ - (r'''(?x)\s* - '''), - (r'.*?)(\?|$)' _TEST = { - 'url': 'http://culturebox.francetvinfo.fr/einstein-on-the-beach-au-theatre-du-chatelet-146813', + 'url': 'http://culturebox.francetvinfo.fr/festivals/dans-les-jardins-de-william-christie/dans-les-jardins-de-william-christie-le-camus-162553', + 'md5': '5ad6dec1ffb2a3fbcb20cc4b744be8d6', 'info_dict': { - 'id': 'EV_6785', - 'ext': 'mp4', - 'title': 'Einstein on the beach au Théâtre du Châtelet', - 'description': 'md5:9ce2888b1efefc617b5e58b3f6200eeb', - }, - 'params': { - # m3u8 download - 'skip_download': True, + 'id': 'EV_22853', + 'ext': 'flv', + 'title': 'Dans les jardins de William Christie - Le Camus', + 'description': 'md5:4710c82315c40f0c865ca8b9a68b5299', + 'upload_date': '20140829', + 'timestamp': 1409317200, }, } @@ -249,5 +259,7 @@ class CultureboxIE(FranceTVBaseInfoExtractor): mobj = re.match(self._VALID_URL, url) name = mobj.group('name') webpage = self._download_webpage(url, name) - video_id = self._search_regex(r'"http://videos\.francetv\.fr/video/(.*?)"', webpage, 'video id') - return self._extract_video(video_id) + video_id, catalogue = self._search_regex( + r'"http://videos\.francetv\.fr/video/([^@]+@[^"]+)"', webpage, 'video id').split('@') + + return self._extract_video(video_id, catalogue) -- cgit v1.2.3 From 38349518f1292b665905b0c2dc30d33021aaa8cb Mon Sep 17 00:00:00 2001 From: Carlos Ramos Date: Tue, 16 Sep 2014 20:48:53 +0200 Subject: Added new host: allmyvideos.net --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/allmyvideos.py | 51 +++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 youtube_dl/extractor/allmyvideos.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index e9fceae4c..f715c3310 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -6,6 +6,7 @@ from .aftonbladet import AftonbladetIE from .anitube import AnitubeIE from .anysex import AnySexIE from .aol import AolIE +from .allmyvideos import AllmyvideosIE from .allocine import AllocineIE from .aparat import AparatIE from .appletrailers import AppleTrailersIE diff --git a/youtube_dl/extractor/allmyvideos.py b/youtube_dl/extractor/allmyvideos.py new file mode 100644 index 000000000..4cb559483 --- /dev/null +++ b/youtube_dl/extractor/allmyvideos.py @@ -0,0 +1,51 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_parse, + compat_urllib_request, +) + + +class AllmyvideosIE(InfoExtractor): + IE_NAME = 'allmyvideos.net' + _VALID_URL = r'https?://allmyvideos\.net/(?P[a-zA-Z0-9_-]+)' + + _TEST = { + 'url': 'http://allmyvideos.net/jih3nce3x6wn', + 'md5': '8f26c1e7102556a0d7f24306d32c2092', + 'info_dict': { + 'id': 'jih3nce3x6wn', + 'ext': 'mp4', + 'title': 'youtube-dl test video', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + orig_webpage = self._download_webpage(url, video_id) + fields = re.findall(r'type="hidden" name="(.+?)"\s* value="?(.+?)">', orig_webpage) + data = {} + for name, value in fields: + data[name] = value + + post = compat_urllib_parse.urlencode(data) + headers = { + b'Content-Type': b'application/x-www-form-urlencoded', + } + req = compat_urllib_request.Request(url, post, headers) + webpage = self._download_webpage(req, video_id, note='Downloading video page ...') + + #Could be several links with different quality + links = re.findall(r'"file" : "?(.+?)",', webpage) + + return { + 'id': video_id, + 'title': data['fname'][:len(data['fname'])-4], #Remove .mp4 extension + 'url': links[len(links)-1] #Choose the higher quality link + } \ No newline at end of file -- cgit v1.2.3 From 7cdd5339b3a02b7429c945ae4cd1e70c7112d555 Mon Sep 17 00:00:00 2001 From: Carlos Ramos Date: Tue, 16 Sep 2014 21:05:50 +0200 Subject: Change tabs to spaces --- youtube_dl/extractor/allmyvideos.py | 48 ++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/youtube_dl/extractor/allmyvideos.py b/youtube_dl/extractor/allmyvideos.py index 4cb559483..7764d4a14 100644 --- a/youtube_dl/extractor/allmyvideos.py +++ b/youtube_dl/extractor/allmyvideos.py @@ -25,27 +25,27 @@ class AllmyvideosIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - - orig_webpage = self._download_webpage(url, video_id) - fields = re.findall(r'type="hidden" name="(.+?)"\s* value="?(.+?)">', orig_webpage) - data = {} - for name, value in fields: - data[name] = value - - post = compat_urllib_parse.urlencode(data) - headers = { - b'Content-Type': b'application/x-www-form-urlencoded', - } - req = compat_urllib_request.Request(url, post, headers) - webpage = self._download_webpage(req, video_id, note='Downloading video page ...') - - #Could be several links with different quality - links = re.findall(r'"file" : "?(.+?)",', webpage) - - return { - 'id': video_id, - 'title': data['fname'][:len(data['fname'])-4], #Remove .mp4 extension - 'url': links[len(links)-1] #Choose the higher quality link - } \ No newline at end of file + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + orig_webpage = self._download_webpage(url, video_id) + fields = re.findall(r'type="hidden" name="(.+?)"\s* value="?(.+?)">', orig_webpage) + data = {} + for name, value in fields: + data[name] = value + + post = compat_urllib_parse.urlencode(data) + headers = { + b'Content-Type': b'application/x-www-form-urlencoded', + } + req = compat_urllib_request.Request(url, post, headers) + webpage = self._download_webpage(req, video_id, note='Downloading video page ...') + + #Could be several links with different quality + links = re.findall(r'"file" : "?(.+?)",', webpage) + + return { + 'id': video_id, + 'title': data['fname'][:len(data['fname'])-4], #Remove .mp4 extension + 'url': links[len(links)-1] #Choose the higher quality link + } \ No newline at end of file -- cgit v1.2.3 From a04aa7a9e692b174f4b03ddf6918c2d0a20ff7fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Tue, 16 Sep 2014 22:53:48 +0200 Subject: [generic] The url in the doesn't need to be enclosed in single quotes (fixes #3770) See the examples in https://en.wikipedia.org/wiki/Meta_refresh or the shortened urls from https://t.co/. --- youtube_dl/extractor/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 2bfa20606..40eeaad16 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -877,7 +877,7 @@ class GenericIE(InfoExtractor): if not found: found = re.search( r'(?i) Date: Tue, 16 Sep 2014 22:56:31 +0200 Subject: [allmyvideos] Support multiple formats --- youtube_dl/extractor/allmyvideos.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/allmyvideos.py b/youtube_dl/extractor/allmyvideos.py index 7764d4a14..e6c60e7e4 100644 --- a/youtube_dl/extractor/allmyvideos.py +++ b/youtube_dl/extractor/allmyvideos.py @@ -1,6 +1,7 @@ # coding: utf-8 from __future__ import unicode_literals +import os.path import re from .common import InfoExtractor @@ -16,7 +17,7 @@ class AllmyvideosIE(InfoExtractor): _TEST = { 'url': 'http://allmyvideos.net/jih3nce3x6wn', - 'md5': '8f26c1e7102556a0d7f24306d32c2092', + 'md5': '710883dee1bfc370ecf9fa6a89307c88', 'info_dict': { 'id': 'jih3nce3x6wn', 'ext': 'mp4', @@ -30,22 +31,29 @@ class AllmyvideosIE(InfoExtractor): orig_webpage = self._download_webpage(url, video_id) fields = re.findall(r'type="hidden" name="(.+?)"\s* value="?(.+?)">', orig_webpage) - data = {} - for name, value in fields: - data[name] = value + data = dict(fields) post = compat_urllib_parse.urlencode(data) headers = { b'Content-Type': b'application/x-www-form-urlencoded', } req = compat_urllib_request.Request(url, post, headers) - webpage = self._download_webpage(req, video_id, note='Downloading video page ...') + webpage = self._download_webpage( + req, video_id, note='Downloading video page ...') + + title = os.path.splitext(data['fname'])[0] #Could be several links with different quality links = re.findall(r'"file" : "?(.+?)",', webpage) + # Assume the links are ordered in quality + formats = [{ + 'url': l, + 'quality': i, + } for i, l in enumerate(links)] + self._sort_formats(formats) return { 'id': video_id, - 'title': data['fname'][:len(data['fname'])-4], #Remove .mp4 extension - 'url': links[len(links)-1] #Choose the higher quality link - } \ No newline at end of file + 'title': title, + 'formats': formats, + } -- cgit v1.2.3 From c001f939e45271b63fd4182de647142834b8d367 Mon Sep 17 00:00:00 2001 From: Carlos Ramos Date: Tue, 16 Sep 2014 23:23:54 +0200 Subject: [Allmyvideos] Fixed md5. Only 10KiB of the video file --- youtube_dl/extractor/allmyvideos.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/allmyvideos.py b/youtube_dl/extractor/allmyvideos.py index 7764d4a14..a5ebdfdf4 100644 --- a/youtube_dl/extractor/allmyvideos.py +++ b/youtube_dl/extractor/allmyvideos.py @@ -16,7 +16,7 @@ class AllmyvideosIE(InfoExtractor): _TEST = { 'url': 'http://allmyvideos.net/jih3nce3x6wn', - 'md5': '8f26c1e7102556a0d7f24306d32c2092', + 'md5': '710883dee1bfc370ecf9fa6a89307c88', 'info_dict': { 'id': 'jih3nce3x6wn', 'ext': 'mp4', -- cgit v1.2.3 From 997987d568b49cb3720083d85f120ef634989ba9 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 16 Sep 2014 23:33:13 +0200 Subject: Credit @r4mos for allmyvideos --- youtube_dl/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 871ddead9..a8d5095ae 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -76,6 +76,7 @@ __authors__ = ( 'Aaron McDaniel (mcd1992)', 'Magnus Kolstad', 'Hari Padmanaban', + 'Carlos Ramos', ) __license__ = 'Public Domain' -- cgit v1.2.3 From 6b6096d0b7c7e98ae2aefb306793ef58ee13c9f2 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 16 Sep 2014 23:35:15 +0200 Subject: release 2014.09.16.1 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index a59cbf879..23892a8bd 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.09.16' +__version__ = '2014.09.16.1' -- cgit v1.2.3 From 5a13fe9ed2abcd67b4e8469805267b1afa0fb2d8 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 17 Sep 2014 12:50:36 +0200 Subject: Credit @m5moufl for behindkink (#3740) --- youtube_dl/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index a8d5095ae..20d7a57ce 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -77,6 +77,7 @@ __authors__ = ( 'Magnus Kolstad', 'Hari Padmanaban', 'Carlos Ramos', + '5moufl', ) __license__ = 'Public Domain' -- cgit v1.2.3 From 944a3de2781658c94d71d4bc4b12bac9b8b382c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 17 Sep 2014 19:02:57 +0700 Subject: [npo] Improve formats extraction (Closes #3782) --- youtube_dl/extractor/npo.py | 77 +++++++++++++++++++++++++++++++++------------ 1 file changed, 57 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 902d62944..7a154e94a 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor from ..utils import ( unified_strdate, + parse_duration, qualities, ) @@ -13,17 +14,43 @@ class NPOIE(InfoExtractor): IE_NAME = 'npo.nl' _VALID_URL = r'https?://www\.npo\.nl/[^/]+/[^/]+/(?P[^/?]+)' - _TEST = { - 'url': 'http://www.npo.nl/nieuwsuur/22-06-2014/VPWON_1220719', - 'md5': '4b3f9c429157ec4775f2c9cb7b911016', - 'info_dict': { - 'id': 'VPWON_1220719', - 'ext': 'm4v', - 'title': 'Nieuwsuur', - 'description': 'Dagelijks tussen tien en elf: nieuws, sport en achtergronden.', - 'upload_date': '20140622', + _TESTS = [ + { + 'url': 'http://www.npo.nl/nieuwsuur/22-06-2014/VPWON_1220719', + 'md5': '4b3f9c429157ec4775f2c9cb7b911016', + 'info_dict': { + 'id': 'VPWON_1220719', + 'ext': 'm4v', + 'title': 'Nieuwsuur', + 'description': 'Dagelijks tussen tien en elf: nieuws, sport en achtergronden.', + 'upload_date': '20140622', + }, }, - } + { + 'url': 'http://www.npo.nl/de-mega-mike-mega-thomas-show/27-02-2009/VARA_101191800', + 'md5': 'da50a5787dbfc1603c4ad80f31c5120b', + 'info_dict': { + 'id': 'VARA_101191800', + 'ext': 'm4v', + 'title': 'De Mega Mike & Mega Thomas show', + 'description': 'md5:3b74c97fc9d6901d5a665aac0e5400f4', + 'upload_date': '20090227', + 'duration': 2400, + }, + }, + { + 'url': 'http://www.npo.nl/tegenlicht/25-02-2013/VPWON_1169289', + 'md5': 'f8065e4e5a7824068ed3c7e783178f2c', + 'info_dict': { + 'id': 'VPWON_1169289', + 'ext': 'm4v', + 'title': 'Tegenlicht', + 'description': 'md5:d6476bceb17a8c103c76c3b708f05dd1', + 'upload_date': '20130225', + 'duration': 3000, + }, + } + ] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -43,19 +70,28 @@ class NPOIE(InfoExtractor): token = self._search_regex(r'npoplayer\.token = "(.+?)"', token_page, 'token') formats = [] - quality = qualities(['adaptive', 'h264_sb', 'h264_bb', 'h264_std']) + quality = qualities(['adaptive', 'wmv_sb', 'h264_sb', 'wmv_bb', 'h264_bb', 'wvc1_std', 'h264_std']) for format_id in metadata['pubopties']: - streams_info = self._download_json( + format_info = self._download_json( 'http://ida.omroep.nl/odi/?prid=%s&puboptions=%s&adaptive=yes&token=%s' % (video_id, format_id, token), - video_id, 'Downloading %s streams info' % format_id) - stream_info = self._download_json( - streams_info['streams'][0] + '&type=json', - video_id, 'Downloading %s stream info' % format_id) + video_id, 'Downloading %s JSON' % format_id) + if format_info.get('error_code', 0) or format_info.get('errorcode', 0): + continue + streams = format_info.get('streams') + if streams: + video_info = self._download_json( + streams[0] + '&type=json', + video_id, 'Downloading %s stream JSON' % format_id) + else: + video_info = format_info + video_url = video_info.get('url') + if not video_url: + continue if format_id == 'adaptive': - formats.extend(self._extract_m3u8_formats(stream_info['url'], video_id)) + formats.extend(self._extract_m3u8_formats(video_url, video_id)) else: formats.append({ - 'url': stream_info['url'], + 'url': video_url, 'format_id': format_id, 'quality': quality(format_id), }) @@ -65,7 +101,8 @@ class NPOIE(InfoExtractor): 'id': video_id, 'title': metadata['titel'], 'description': metadata['info'], - 'thumbnail': metadata['images'][-1]['url'], - 'upload_date': unified_strdate(metadata['gidsdatum']), + 'thumbnail': metadata.get('images', [{'url': None}])[-1]['url'], + 'upload_date': unified_strdate(metadata.get('gidsdatum')), + 'duration': parse_duration(metadata.get('tijdsduur')), 'formats': formats, } -- cgit v1.2.3 From 67abbe95273f59f4a04486172e6d422a10b6afb3 Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis Date: Wed, 17 Sep 2014 22:57:01 +0300 Subject: [videomega] Add new extractor. Closes #3775 --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/videomega.py | 59 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) create mode 100644 youtube_dl/extractor/videomega.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index f715c3310..75831b40a 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -393,6 +393,7 @@ from .videobam import VideoBamIE from .videodetective import VideoDetectiveIE from .videolecturesnet import VideoLecturesNetIE from .videofyme import VideofyMeIE +from .videomega import VideoMegaIE from .videopremium import VideoPremiumIE from .videott import VideoTtIE from .videoweed import VideoWeedIE diff --git a/youtube_dl/extractor/videomega.py b/youtube_dl/extractor/videomega.py new file mode 100644 index 000000000..1b6b65839 --- /dev/null +++ b/youtube_dl/extractor/videomega.py @@ -0,0 +1,59 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_parse, + remove_start, +) + + +class VideoMegaIE(InfoExtractor): + _VALID_URL = r'''(?x)https?:// + (?:www\.)?videomega\.tv/ + (?:iframe\.php)?\?ref=(?P[A-Za-z0-9]+) + ''' + _TEST = { + 'url': 'http://videomega.tv/?ref=GKeGPVedBe', + 'md5': '240fb5bcf9199961f48eb17839b084d6', + 'info_dict': { + 'id': 'GKeGPVedBe', + 'ext': 'mp4', + 'title': 'XXL - All Sports United', + 'thumbnail': 're:^https?://.*\.jpg$', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + url = 'http://videomega.tv/iframe.php?ref={0:}'.format(video_id) + webpage = self._download_webpage(url, video_id) + + escaped_data = self._search_regex( + 'unescape\("([^"]+)"\)', webpage, 'escaped data') + playlist = compat_urllib_parse.unquote(escaped_data) + + thumbnail = self._search_regex( + r'image:\s*"([^"]+)"', playlist, 'thumbnail', fatal=False) + url = self._search_regex(r'file:\s*"([^"]+)"', playlist, 'URL') + title = self._html_search_regex( + r'(.*?)', webpage, 'title') + if title: + title = remove_start(title, 'VideoMega.tv - ') + + formats = [] + formats.append({ + 'format_id': 'sd', + 'url': url, + }) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'thumbnail': thumbnail, + } -- cgit v1.2.3 From 0e59b9fffb12255a16577dca7710b7738feca75c Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 18 Sep 2014 00:18:27 +0200 Subject: [videomega] Simplify (#3786) * Use raw strings (r'foo') for regular expressions (enables highlighting and avoids some errors). * title is always true-ish --- youtube_dl/extractor/videomega.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/videomega.py b/youtube_dl/extractor/videomega.py index 1b6b65839..29c4e0101 100644 --- a/youtube_dl/extractor/videomega.py +++ b/youtube_dl/extractor/videomega.py @@ -34,22 +34,20 @@ class VideoMegaIE(InfoExtractor): webpage = self._download_webpage(url, video_id) escaped_data = self._search_regex( - 'unescape\("([^"]+)"\)', webpage, 'escaped data') + r'unescape\("([^"]+)"\)', webpage, 'escaped data') playlist = compat_urllib_parse.unquote(escaped_data) thumbnail = self._search_regex( r'image:\s*"([^"]+)"', playlist, 'thumbnail', fatal=False) url = self._search_regex(r'file:\s*"([^"]+)"', playlist, 'URL') - title = self._html_search_regex( - r'(.*?)', webpage, 'title') - if title: - title = remove_start(title, 'VideoMega.tv - ') + title = remove_start(self._html_search_regex( + r'(.*?)', webpage, 'title'), 'VideoMega.tv - ') - formats = [] - formats.append({ + formats = [{ 'format_id': 'sd', 'url': url, - }) + }] + self._sort_formats(formats) return { 'id': video_id, -- cgit v1.2.3 From 9296738f20c1335498a78c99a86767e9bae4f6d2 Mon Sep 17 00:00:00 2001 From: dequis Date: Thu, 18 Sep 2014 03:02:03 -0300 Subject: [soundcloud] Support api urls with secret_token, Closes #3707 --- youtube_dl/extractor/soundcloud.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index b78aed7f0..129f587ec 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -31,7 +31,8 @@ class SoundcloudIE(InfoExtractor): (?!sets/|likes/?(?:$|[?#])) (?P[\w\d-]+)/? (?P<token>[^?]+?)?(?:[?].*)?$) - |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+)) + |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+) + (?:/?\?secret_token=(?P<secret_token>[^&]+?))?$) |(?P<player>(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*) ) ''' @@ -80,6 +81,20 @@ class SoundcloudIE(InfoExtractor): 'duration': 9, }, }, + # private link (alt format) + { + 'url': 'https://api.soundcloud.com/tracks/123998367?secret_token=s-8Pjrp', + 'md5': 'aa0dd32bfea9b0c5ef4f02aacd080604', + 'info_dict': { + 'id': '123998367', + 'ext': 'mp3', + 'title': 'Youtube - Dl Test Video \'\' Ä↭', + 'uploader': 'jaimeMF', + 'description': 'test chars: \"\'/\\ä↭', + 'upload_date': '20131209', + 'duration': 9, + }, + }, # downloadable song { 'url': 'https://soundcloud.com/oddsamples/bus-brakes', @@ -197,6 +212,9 @@ class SoundcloudIE(InfoExtractor): if track_id is not None: info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID full_title = track_id + token = mobj.group('secret_token') + if token: + info_json_url += "&secret_token=" + token elif mobj.group('player'): query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) return self.url_result(query['url'][0]) -- cgit v1.2.3 From 2f834e938192a61fd4a32fa98bffb5e1b614bc29 Mon Sep 17 00:00:00 2001 From: dequis <dx@dxzone.com.ar> Date: Thu, 18 Sep 2014 06:35:11 -0300 Subject: [soundcloud] Secret playlists and sets Closes #3707 again. No test cases because I don't know what urls to use that won't be turned into public eventually (as it happened with the first one in that ticket) --- youtube_dl/extractor/soundcloud.py | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 129f587ec..2bed3c350 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -238,7 +238,7 @@ class SoundcloudIE(InfoExtractor): class SoundcloudSetIE(SoundcloudIE): - _VALID_URL = r'https?://(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)' + _VALID_URL = r'https?://(?:www\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?' IE_NAME = 'soundcloud:set' _TESTS = [{ 'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep', @@ -252,14 +252,19 @@ class SoundcloudSetIE(SoundcloudIE): mobj = re.match(self._VALID_URL, url) # extract uploader (which is in the url) - uploader = mobj.group(1) + uploader = mobj.group('uploader') # extract simple title (uploader + slug of song title) - slug_title = mobj.group(2) + slug_title = mobj.group('slug_title') full_title = '%s/sets/%s' % (uploader, slug_title) + url = 'http://soundcloud.com/%s/sets/%s' % (uploader, slug_title) + + token = mobj.group('token') + if token: + full_title += '/' + token + url += '/' + token self.report_resolve(full_title) - url = 'http://soundcloud.com/%s/sets/%s' % (uploader, slug_title) resolv_url = self._resolv_url(url) info = self._download_json(resolv_url, full_title) @@ -270,7 +275,7 @@ class SoundcloudSetIE(SoundcloudIE): return { '_type': 'playlist', - 'entries': [self._extract_info_dict(track) for track in info['tracks']], + 'entries': [self._extract_info_dict(track, secret_token=token) for track in info['tracks']], 'id': info['id'], 'title': info['title'], } @@ -333,7 +338,7 @@ class SoundcloudUserIE(SoundcloudIE): class SoundcloudPlaylistIE(SoundcloudIE): - _VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)' + _VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))$' IE_NAME = 'soundcloud:playlist' _TESTS = [ @@ -353,14 +358,21 @@ class SoundcloudPlaylistIE(SoundcloudIE): playlist_id = mobj.group('id') base_url = '%s//api.soundcloud.com/playlists/%s.json?' % (self.http_scheme(), playlist_id) - data = compat_urllib_parse.urlencode({ + data_dict = { 'client_id': self._CLIENT_ID, - }) + } + token = mobj.group('token') + + if token: + data_dict['secret_token'] = token + + data = compat_urllib_parse.urlencode(data_dict) data = self._download_json( base_url + data, playlist_id, 'Downloading playlist') entries = [ - self._extract_info_dict(t, quiet=True) for t in data['tracks']] + self._extract_info_dict(t, quiet=True, secret_token=token) + for t in data['tracks']] return { '_type': 'playlist', -- cgit v1.2.3 From 2914e5f00f6ebcc59712b7091a87988408ff3c88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 18 Sep 2014 20:56:54 +0700 Subject: [drtuber] Fix categories --- youtube_dl/extractor/drtuber.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/drtuber.py b/youtube_dl/extractor/drtuber.py index d5bfd7f22..ca274dff6 100644 --- a/youtube_dl/extractor/drtuber.py +++ b/youtube_dl/extractor/drtuber.py @@ -19,7 +19,7 @@ class DrTuberIE(InfoExtractor): 'like_count': int, 'dislike_count': int, 'comment_count': int, - 'categories': list, # NSFW + 'categories': ['Babe', 'Blonde', 'Erotic', 'Outdoor', 'Softcore', 'Solo'], 'thumbnail': 're:https?://.*\.jpg$', 'age_limit': 18, } @@ -52,9 +52,9 @@ class DrTuberIE(InfoExtractor): r'<span class="comments_count">([\d,\.]+)</span>', webpage, 'comment count', fatal=False)) - cats_str = self._html_search_regex( - r'<meta name="keywords" content="([^"]+)"', webpage, 'categories', fatal=False) - categories = None if cats_str is None else cats_str.split(' ') + cats_str = self._search_regex( + r'<span>Categories:</span><div>(.+?)</div>', webpage, 'categories', fatal=False) + categories = [] if not cats_str else re.findall(r'<a title="([^"]+)"', cats_str) return { 'id': video_id, -- cgit v1.2.3 From 109a540e7a4c5741fa77b68b4f346f42dc1cda97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Thu, 18 Sep 2014 16:57:34 +0200 Subject: [ign] Fix extraction --- youtube_dl/extractor/ign.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ign.py b/youtube_dl/extractor/ign.py index 9e8b69f57..ac7804ad9 100644 --- a/youtube_dl/extractor/ign.py +++ b/youtube_dl/extractor/ign.py @@ -71,6 +71,7 @@ class IGNIE(InfoExtractor): def _find_video_id(self, webpage): res_id = [ + r'"video_id"\s*:\s*"(.*?)"', r'data-video-id="(.+?)"', r'<object id="vid_(.+?)"', r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"', @@ -85,7 +86,7 @@ class IGNIE(InfoExtractor): webpage = self._download_webpage(url, name_or_id) if page_type != 'video': multiple_urls = re.findall( - '<param name="flashvars" value="[^"]*?url=(https?://www\.ign\.com/videos/.*?)["&]', + '<param name="flashvars"[^>]*value="[^"]*?url=(https?://www\.ign\.com/videos/.*?)["&]', webpage) if multiple_urls: return [self.url_result(u, ie='IGN') for u in multiple_urls] -- cgit v1.2.3 From 09b23c902b5ab4a4ca9607128128d110a3c41875 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Thu, 18 Sep 2014 17:02:53 +0200 Subject: [1up.com] Urls end now with '.html' --- youtube_dl/extractor/ign.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/ign.py b/youtube_dl/extractor/ign.py index ac7804ad9..12e9e61c4 100644 --- a/youtube_dl/extractor/ign.py +++ b/youtube_dl/extractor/ign.py @@ -112,13 +112,13 @@ class IGNIE(InfoExtractor): class OneUPIE(IGNIE): - _VALID_URL = r'https?://gamevideos\.1up\.com/(?P<type>video)/id/(?P<name_or_id>.+)' + _VALID_URL = r'https?://gamevideos\.1up\.com/(?P<type>video)/id/(?P<name_or_id>.+)\.html' IE_NAME = '1up.com' _DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>' _TESTS = [{ - 'url': 'http://gamevideos.1up.com/video/id/34976', + 'url': 'http://gamevideos.1up.com/video/id/34976.html', 'md5': '68a54ce4ebc772e4b71e3123d413163d', 'info_dict': { 'id': '34976', -- cgit v1.2.3 From e2e5dae64da60c37af65c7cffd18475a30fcbad3 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Thu, 18 Sep 2014 18:40:19 +0200 Subject: Add -f m4a --- youtube_dl/YoutubeDL.py | 2 +- youtube_dl/options.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 9519594c9..eaba40bf2 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -708,7 +708,7 @@ class YoutubeDL(object): if video_formats: return video_formats[0] else: - extensions = ['mp4', 'flv', 'webm', '3gp'] + extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a'] if format_spec in extensions: filter_f = lambda f: f['ext'] == format_spec else: diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 31baab469..7df20ae61 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -218,7 +218,7 @@ def parseOpts(overrideArguments=None): video_format.add_option('-f', '--format', action='store', dest='format', metavar='FORMAT', default=None, - help='video format code, specify the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported. You can also use the special names "best", "bestvideo", "bestaudio", "worst", "worstvideo" and "worstaudio". By default, youtube-dl will pick the best quality.') + help='video format code, specify the order of preference using slashes: -f 22/17/18 . -f mp4 , -f m4a and -f flv are also supported. You can also use the special names "best", "bestvideo", "bestaudio", "worst", "worstvideo" and "worstaudio". By default, youtube-dl will pick the best quality.') video_format.add_option('--all-formats', action='store_const', dest='format', help='download all available video formats', const='all') video_format.add_option('--prefer-free-formats', -- cgit v1.2.3 From 1de33fafd94c7e0d4ccede711ef7f13bd3e2301b Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Thu, 18 Sep 2014 18:43:49 +0200 Subject: [YoutubeDL] Allow downloading multiple formats with , --- youtube_dl/YoutubeDL.py | 43 ++++++++++++++++++++++--------------------- youtube_dl/options.py | 2 +- 2 files changed, 23 insertions(+), 22 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index eaba40bf2..a1713dc5a 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -809,28 +809,29 @@ class YoutubeDL(object): if req_format in ('-1', 'all'): formats_to_download = formats else: - # We can accept formats requested in the format: 34/5/best, we pick - # the first that is available, starting from left - req_formats = req_format.split('/') - for rf in req_formats: - if re.match(r'.+?\+.+?', rf) is not None: - # Two formats have been requested like '137+139' - format_1, format_2 = rf.split('+') - formats_info = (self.select_format(format_1, formats), - self.select_format(format_2, formats)) - if all(formats_info): - selected_format = { - 'requested_formats': formats_info, - 'format': rf, - 'ext': formats_info[0]['ext'], - } + for rfstr in req_format.split(','): + # We can accept formats requested in the format: 34/5/best, we pick + # the first that is available, starting from left + req_formats = rfstr.split('/') + for rf in req_formats: + if re.match(r'.+?\+.+?', rf) is not None: + # Two formats have been requested like '137+139' + format_1, format_2 = rf.split('+') + formats_info = (self.select_format(format_1, formats), + self.select_format(format_2, formats)) + if all(formats_info): + selected_format = { + 'requested_formats': formats_info, + 'format': rf, + 'ext': formats_info[0]['ext'], + } + else: + selected_format = None else: - selected_format = None - else: - selected_format = self.select_format(rf, formats) - if selected_format is not None: - formats_to_download = [selected_format] - break + selected_format = self.select_format(rf, formats) + if selected_format is not None: + formats_to_download.append(selected_format) + break if not formats_to_download: raise ExtractorError('requested format not available', expected=True) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 7df20ae61..44dcb1e34 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -218,7 +218,7 @@ def parseOpts(overrideArguments=None): video_format.add_option('-f', '--format', action='store', dest='format', metavar='FORMAT', default=None, - help='video format code, specify the order of preference using slashes: -f 22/17/18 . -f mp4 , -f m4a and -f flv are also supported. You can also use the special names "best", "bestvideo", "bestaudio", "worst", "worstvideo" and "worstaudio". By default, youtube-dl will pick the best quality.') + help='video format code, specify the order of preference using slashes: -f 22/17/18 . -f mp4 , -f m4a and -f flv are also supported. You can also use the special names "best", "bestvideo", "bestaudio", "worst", "worstvideo" and "worstaudio". By default, youtube-dl will pick the best quality. Use commas to download multiple audio formats, such as -f 136/137/mp4/bestvideo,140/m4a/bestaudio') video_format.add_option('--all-formats', action='store_const', dest='format', help='download all available video formats', const='all') video_format.add_option('--prefer-free-formats', -- cgit v1.2.3 From fd78a4d3e63f191e0774584d9b71bf25a2d8dbcf Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Thu, 18 Sep 2014 18:43:59 +0200 Subject: release 2014.09.18 --- README.md | 15 +++++++++------ youtube_dl/version.py | 2 +- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 5cc959ac5..5d15decb5 100644 --- a/README.md +++ b/README.md @@ -227,12 +227,15 @@ which means you can modify it, redistribute it or use it however you like. ## Video Format Options: -f, --format FORMAT video format code, specify the order of - preference using slashes: "-f 22/17/18". - "-f mp4" and "-f flv" are also supported. - You can also use the special names "best", - "bestvideo", "bestaudio", "worst", - "worstvideo" and "worstaudio". By default, - youtube-dl will pick the best quality. + preference using slashes: -f 22/17/18 . -f + mp4 , -f m4a and -f flv are also + supported. You can also use the special + names "best", "bestvideo", "bestaudio", + "worst", "worstvideo" and "worstaudio". By + default, youtube-dl will pick the best + quality. Use commas to download multiple + audio formats, such as -f + 136/137/mp4/bestvideo,140/m4a/bestaudio --all-formats download all available video formats --prefer-free-formats prefer free video formats unless a specific one is requested diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 23892a8bd..430509ba3 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.09.16.1' +__version__ = '2014.09.18' -- cgit v1.2.3 From 0529eef5a4513d8f3c042f09fe5485e1c41e2f08 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Thu, 18 Sep 2014 18:54:03 +0200 Subject: [hypestat] Unify allmyvideos and vidspot (Closes #3788) --- youtube_dl/extractor/__init__.py | 2 +- youtube_dl/extractor/allmyvideos.py | 59 -------------------------------- youtube_dl/extractor/hypestat.py | 67 +++++++++++++++++++++++++++++++++++++ 3 files changed, 68 insertions(+), 60 deletions(-) delete mode 100644 youtube_dl/extractor/allmyvideos.py create mode 100644 youtube_dl/extractor/hypestat.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 75831b40a..97693018f 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -6,7 +6,6 @@ from .aftonbladet import AftonbladetIE from .anitube import AnitubeIE from .anysex import AnySexIE from .aol import AolIE -from .allmyvideos import AllmyvideosIE from .allocine import AllocineIE from .aparat import AparatIE from .appletrailers import AppleTrailersIE @@ -151,6 +150,7 @@ from .howcast import HowcastIE from .howstuffworks import HowStuffWorksIE from .huffpost import HuffPostIE from .hypem import HypemIE +from .hypestat import HypestatIE from .iconosquare import IconosquareIE from .ign import IGNIE, OneUPIE from .imdb import ( diff --git a/youtube_dl/extractor/allmyvideos.py b/youtube_dl/extractor/allmyvideos.py deleted file mode 100644 index e6c60e7e4..000000000 --- a/youtube_dl/extractor/allmyvideos.py +++ /dev/null @@ -1,59 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import os.path -import re - -from .common import InfoExtractor -from ..utils import ( - compat_urllib_parse, - compat_urllib_request, -) - - -class AllmyvideosIE(InfoExtractor): - IE_NAME = 'allmyvideos.net' - _VALID_URL = r'https?://allmyvideos\.net/(?P<id>[a-zA-Z0-9_-]+)' - - _TEST = { - 'url': 'http://allmyvideos.net/jih3nce3x6wn', - 'md5': '710883dee1bfc370ecf9fa6a89307c88', - 'info_dict': { - 'id': 'jih3nce3x6wn', - 'ext': 'mp4', - 'title': 'youtube-dl test video', - }, - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - - orig_webpage = self._download_webpage(url, video_id) - fields = re.findall(r'type="hidden" name="(.+?)"\s* value="?(.+?)">', orig_webpage) - data = dict(fields) - - post = compat_urllib_parse.urlencode(data) - headers = { - b'Content-Type': b'application/x-www-form-urlencoded', - } - req = compat_urllib_request.Request(url, post, headers) - webpage = self._download_webpage( - req, video_id, note='Downloading video page ...') - - title = os.path.splitext(data['fname'])[0] - - #Could be several links with different quality - links = re.findall(r'"file" : "?(.+?)",', webpage) - # Assume the links are ordered in quality - formats = [{ - 'url': l, - 'quality': i, - } for i, l in enumerate(links)] - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - } diff --git a/youtube_dl/extractor/hypestat.py b/youtube_dl/extractor/hypestat.py new file mode 100644 index 000000000..8b8db30ae --- /dev/null +++ b/youtube_dl/extractor/hypestat.py @@ -0,0 +1,67 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import os.path +import re + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_parse, + compat_urllib_request, +) + + +class HypestatIE(InfoExtractor): + IE_DESC = 'allmyvideos.net and vidspot.net' + _VALID_URL = r'https?://(?:allmyvideos|vidspot)\.net/(?P<id>[a-zA-Z0-9_-]+)' + + _TESTS = [{ + 'url': 'http://allmyvideos.net/jih3nce3x6wn', + 'md5': '710883dee1bfc370ecf9fa6a89307c88', + 'info_dict': { + 'id': 'jih3nce3x6wn', + 'ext': 'mp4', + 'title': 'youtube-dl test video', + }, + }, { + 'url': 'http://vidspot.net/l2ngsmhs8ci5', + 'md5': '710883dee1bfc370ecf9fa6a89307c88', + 'info_dict': { + 'id': 'l2ngsmhs8ci5', + 'ext': 'mp4', + 'title': 'youtube-dl test video', + }, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + orig_webpage = self._download_webpage(url, video_id) + fields = re.findall(r'type="hidden" name="(.+?)"\s* value="?(.+?)">', orig_webpage) + data = dict(fields) + + post = compat_urllib_parse.urlencode(data) + headers = { + b'Content-Type': b'application/x-www-form-urlencoded', + } + req = compat_urllib_request.Request(url, post, headers) + webpage = self._download_webpage( + req, video_id, note='Downloading video page ...') + + title = os.path.splitext(data['fname'])[0] + + #Could be several links with different quality + links = re.findall(r'"file" : "?(.+?)",', webpage) + # Assume the links are ordered in quality + formats = [{ + 'url': l, + 'quality': i, + } for i, l in enumerate(links)] + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + } -- cgit v1.2.3 From 37bfe8ace4dcd1b476a54aedb7f39b88e7bb527e Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Thu, 18 Sep 2014 18:56:02 +0200 Subject: [hypestat] Match URLs with www. and https:// --- youtube_dl/extractor/hypestat.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/hypestat.py b/youtube_dl/extractor/hypestat.py index 8b8db30ae..e1a142268 100644 --- a/youtube_dl/extractor/hypestat.py +++ b/youtube_dl/extractor/hypestat.py @@ -13,7 +13,7 @@ from ..utils import ( class HypestatIE(InfoExtractor): IE_DESC = 'allmyvideos.net and vidspot.net' - _VALID_URL = r'https?://(?:allmyvideos|vidspot)\.net/(?P<id>[a-zA-Z0-9_-]+)' + _VALID_URL = r'https?://(?:www\.)?(?:allmyvideos|vidspot)\.net/(?P<id>[a-zA-Z0-9_-]+)' _TESTS = [{ 'url': 'http://allmyvideos.net/jih3nce3x6wn', @@ -31,6 +31,9 @@ class HypestatIE(InfoExtractor): 'ext': 'mp4', 'title': 'youtube-dl test video', }, + }, { + 'url': 'https://www.vidspot.net/l2ngsmhs8ci5', + 'only_matching': True, }] def _real_extract(self, url): -- cgit v1.2.3 From 46f74bcf5c5fc876e3a966408cb8bde6d6ef15e0 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Thu, 18 Sep 2014 18:57:04 +0200 Subject: [soundcloud] Fix non-secret playlists --- youtube_dl/extractor/soundcloud.py | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 2bed3c350..4719ba45c 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -338,20 +338,17 @@ class SoundcloudUserIE(SoundcloudIE): class SoundcloudPlaylistIE(SoundcloudIE): - _VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))$' + _VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$' IE_NAME = 'soundcloud:playlist' - _TESTS = [ - - { - 'url': 'http://api.soundcloud.com/playlists/4110309', - 'info_dict': { - 'id': '4110309', - 'title': 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]', - 'description': 're:.*?TILT Brass - Bowery Poetry Club', - }, - 'playlist_count': 6, - } - ] + _TESTS = [{ + 'url': 'http://api.soundcloud.com/playlists/4110309', + 'info_dict': { + 'id': '4110309', + 'title': 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]', + 'description': 're:.*?TILT Brass - Bowery Poetry Club', + }, + 'playlist_count': 6, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) -- cgit v1.2.3 From 589d3d7c7ae18875060caa15f5547c0194932e55 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Thu, 18 Sep 2014 21:37:09 +0200 Subject: [moniker] rename from hypestat (#3788) --- youtube_dl/extractor/__init__.py | 2 +- youtube_dl/extractor/hypestat.py | 70 ---------------------------------------- youtube_dl/extractor/moniker.py | 70 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 71 insertions(+), 71 deletions(-) delete mode 100644 youtube_dl/extractor/hypestat.py create mode 100644 youtube_dl/extractor/moniker.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 97693018f..a9a33c40f 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -150,7 +150,6 @@ from .howcast import HowcastIE from .howstuffworks import HowStuffWorksIE from .huffpost import HuffPostIE from .hypem import HypemIE -from .hypestat import HypestatIE from .iconosquare import IconosquareIE from .ign import IGNIE, OneUPIE from .imdb import ( @@ -209,6 +208,7 @@ from .mpora import MporaIE from .moevideo import MoeVideoIE from .mofosex import MofosexIE from .mojvideo import MojvideoIE +from .moniker import MonikerIE from .mooshare import MooshareIE from .morningstar import MorningstarIE from .motherless import MotherlessIE diff --git a/youtube_dl/extractor/hypestat.py b/youtube_dl/extractor/hypestat.py deleted file mode 100644 index e1a142268..000000000 --- a/youtube_dl/extractor/hypestat.py +++ /dev/null @@ -1,70 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import os.path -import re - -from .common import InfoExtractor -from ..utils import ( - compat_urllib_parse, - compat_urllib_request, -) - - -class HypestatIE(InfoExtractor): - IE_DESC = 'allmyvideos.net and vidspot.net' - _VALID_URL = r'https?://(?:www\.)?(?:allmyvideos|vidspot)\.net/(?P<id>[a-zA-Z0-9_-]+)' - - _TESTS = [{ - 'url': 'http://allmyvideos.net/jih3nce3x6wn', - 'md5': '710883dee1bfc370ecf9fa6a89307c88', - 'info_dict': { - 'id': 'jih3nce3x6wn', - 'ext': 'mp4', - 'title': 'youtube-dl test video', - }, - }, { - 'url': 'http://vidspot.net/l2ngsmhs8ci5', - 'md5': '710883dee1bfc370ecf9fa6a89307c88', - 'info_dict': { - 'id': 'l2ngsmhs8ci5', - 'ext': 'mp4', - 'title': 'youtube-dl test video', - }, - }, { - 'url': 'https://www.vidspot.net/l2ngsmhs8ci5', - 'only_matching': True, - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - - orig_webpage = self._download_webpage(url, video_id) - fields = re.findall(r'type="hidden" name="(.+?)"\s* value="?(.+?)">', orig_webpage) - data = dict(fields) - - post = compat_urllib_parse.urlencode(data) - headers = { - b'Content-Type': b'application/x-www-form-urlencoded', - } - req = compat_urllib_request.Request(url, post, headers) - webpage = self._download_webpage( - req, video_id, note='Downloading video page ...') - - title = os.path.splitext(data['fname'])[0] - - #Could be several links with different quality - links = re.findall(r'"file" : "?(.+?)",', webpage) - # Assume the links are ordered in quality - formats = [{ - 'url': l, - 'quality': i, - } for i, l in enumerate(links)] - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - } diff --git a/youtube_dl/extractor/moniker.py b/youtube_dl/extractor/moniker.py new file mode 100644 index 000000000..79bb2ca59 --- /dev/null +++ b/youtube_dl/extractor/moniker.py @@ -0,0 +1,70 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import os.path +import re + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_parse, + compat_urllib_request, +) + + +class MonikerIE(InfoExtractor): + IE_DESC = 'allmyvideos.net and vidspot.net' + _VALID_URL = r'https?://(?:www\.)?(?:allmyvideos|vidspot)\.net/(?P<id>[a-zA-Z0-9_-]+)' + + _TESTS = [{ + 'url': 'http://allmyvideos.net/jih3nce3x6wn', + 'md5': '710883dee1bfc370ecf9fa6a89307c88', + 'info_dict': { + 'id': 'jih3nce3x6wn', + 'ext': 'mp4', + 'title': 'youtube-dl test video', + }, + }, { + 'url': 'http://vidspot.net/l2ngsmhs8ci5', + 'md5': '710883dee1bfc370ecf9fa6a89307c88', + 'info_dict': { + 'id': 'l2ngsmhs8ci5', + 'ext': 'mp4', + 'title': 'youtube-dl test video', + }, + }, { + 'url': 'https://www.vidspot.net/l2ngsmhs8ci5', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + orig_webpage = self._download_webpage(url, video_id) + fields = re.findall(r'type="hidden" name="(.+?)"\s* value="?(.+?)">', orig_webpage) + data = dict(fields) + + post = compat_urllib_parse.urlencode(data) + headers = { + b'Content-Type': b'application/x-www-form-urlencoded', + } + req = compat_urllib_request.Request(url, post, headers) + webpage = self._download_webpage( + req, video_id, note='Downloading video page ...') + + title = os.path.splitext(data['fname'])[0] + + #Could be several links with different quality + links = re.findall(r'"file" : "?(.+?)",', webpage) + # Assume the links are ordered in quality + formats = [{ + 'url': l, + 'quality': i, + } for i, l in enumerate(links)] + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + } -- cgit v1.2.3 From 7267bd536fb81cb1bdcc6554219a0b66a75b31a6 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Fri, 19 Sep 2014 09:57:53 +0200 Subject: [muenchentv] Add support (Fixes #3507) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/common.py | 2 + youtube_dl/extractor/muenchentv.py | 77 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 80 insertions(+) create mode 100644 youtube_dl/extractor/muenchentv.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index a9a33c40f..625666acb 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -221,6 +221,7 @@ from .mtv import ( MTVServicesEmbeddedIE, MTVIggyIE, ) +from .muenchentv import MuenchenTVIE from .musicplayon import MusicPlayOnIE from .musicvault import MusicVaultIE from .muzu import MuzuTVIE diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 929dd1e97..9c30a1d33 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -130,6 +130,8 @@ class InfoExtractor(object): by YoutubeDL if it's missing) categories: A list of categories that the video falls in, for example ["Sports", "Berlin"] + is_live: True, False, or None (=unknown). Whether this video is a + live stream that goes on instead of a fixed-length video. Unless mentioned otherwise, the fields should be Unicode strings. diff --git a/youtube_dl/extractor/muenchentv.py b/youtube_dl/extractor/muenchentv.py new file mode 100644 index 000000000..3a938861b --- /dev/null +++ b/youtube_dl/extractor/muenchentv.py @@ -0,0 +1,77 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import datetime +import json + +from .common import InfoExtractor +from ..utils import ( + determine_ext, + int_or_none, + js_to_json, +) + + +class MuenchenTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?muenchen\.tv/livestream' + IE_DESC = 'münchen.tv' + _TEST = { + 'url': 'http://www.muenchen.tv/livestream/', + 'info_dict': { + 'id': '5334', + 'display_id': 'live', + 'ext': 'mp4', + 'title': 're:^münchen.tv-Livestream [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'is_live': True, + }, + 'params': { + 'skip_download': True, + } + } + + def _real_extract(self, url): + display_id = 'live' + webpage = self._download_webpage(url, display_id) + + now = datetime.datetime.now() + now_str = now.strftime("%Y-%m-%d %H:%M") + title = self._og_search_title(webpage) + ' ' + now_str + + data_js = self._search_regex( + r'(?s)\nplaylist:\s*(\[.*?}\]),related:', + webpage, 'playlist configuration') + data_json = js_to_json(data_js) + data = json.loads(data_json)[0] + + video_id = data['mediaid'] + thumbnail = data.get('image') + + formats = [] + for format_num, s in enumerate(data['sources']): + ext = determine_ext(s['file'], None) + label_str = s.get('label') + if label_str is None: + label_str = '_%d' % format_num + + if ext is None: + format_id = label_str + else: + format_id = '%s-%s' % (ext, label_str) + + formats.append({ + 'url': s['file'], + 'tbr': int_or_none(s.get('label')), + 'ext': 'mp4', + 'format_id': format_id, + 'preference': -100 if '.smil' in s['file'] else 0, + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'formats': formats, + 'is_live': True, + } + -- cgit v1.2.3 From f566d9f1d54a61497a17c5ed62a32ee1387483bd Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Fri, 19 Sep 2014 09:58:01 +0200 Subject: release 2014.09.19 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 430509ba3..940e9c8cf 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.09.18' +__version__ = '2014.09.19' -- cgit v1.2.3 From 532f5bff70cc32f54f38fbce9233a88faf4423b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 19 Sep 2014 20:58:50 +0700 Subject: [franceinter] Fix extraction and modernize --- youtube_dl/extractor/franceinter.py | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/franceinter.py b/youtube_dl/extractor/franceinter.py index deb1b0b9d..6613ee17a 100644 --- a/youtube_dl/extractor/franceinter.py +++ b/youtube_dl/extractor/franceinter.py @@ -4,16 +4,21 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..utils import int_or_none class FranceInterIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?franceinter\.fr/player/reecouter\?play=(?P<id>[0-9]{6})' + _VALID_URL = r'http://(?:www\.)?franceinter\.fr/player/reecouter\?play=(?P<id>[0-9]+)' _TEST = { 'url': 'http://www.franceinter.fr/player/reecouter?play=793962', - 'file': '793962.mp3', 'md5': '4764932e466e6f6c79c317d2e74f6884', "info_dict": { - "title": "L’Histoire dans les jeux vidéo", + 'id': '793962', + 'ext': 'mp3', + 'title': 'L’Histoire dans les jeux vidéo', + 'description': 'md5:7e93ddb4451e7530022792240a3049c7', + 'timestamp': 1387369800, + 'upload_date': '20131218', }, } @@ -22,17 +27,26 @@ class FranceInterIE(InfoExtractor): video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) - title = self._html_search_regex( - r'<span class="roll_overflow">(.*?)</span></h1>', webpage, 'title') + path = self._search_regex( - r'&urlAOD=(.*?)&startTime', webpage, 'video url') + r'<a id="player".+?href="([^"]+)"', webpage, 'video url') video_url = 'http://www.franceinter.fr/' + path + title = self._html_search_regex( + r'<span class="title">(.+?)</span>', webpage, 'title') + description = self._html_search_regex( + r'<span class="description">(.*?)</span>', + webpage, 'description', fatal=False) + timestamp = int_or_none(self._search_regex( + r'data-date="(\d+)"', webpage, 'upload date', fatal=False)) + return { 'id': video_id, + 'title': title, + 'description': description, + 'timestamp': timestamp, 'formats': [{ 'url': video_url, 'vcodec': 'none', }], - 'title': title, } -- cgit v1.2.3 From 3e8fcd9fa1ae23ee3f0370dd948411a5f74c03dc Mon Sep 17 00:00:00 2001 From: Marco Schuster <marco+github@m-s-d.eu> Date: Sat, 20 Sep 2014 02:32:41 +0200 Subject: [divxstage] added .to TLD Example video "http://www.divxstage.eu/video/930c52709d2" which gets redirected to .to TLD --- youtube_dl/extractor/divxstage.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/divxstage.py b/youtube_dl/extractor/divxstage.py index 4ca3f37a2..b88379e06 100644 --- a/youtube_dl/extractor/divxstage.py +++ b/youtube_dl/extractor/divxstage.py @@ -7,7 +7,7 @@ class DivxStageIE(NovaMovIE): IE_NAME = 'divxstage' IE_DESC = 'DivxStage' - _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'divxstage\.(?:eu|net|ch|co|at|ag)'} + _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'divxstage\.(?:eu|net|ch|co|at|ag|to)'} _HOST = 'www.divxstage.eu' @@ -24,4 +24,4 @@ class DivxStageIE(NovaMovIE): 'title': 'youtubedl test video', 'description': 'This is a test video for youtubedl.', } - } \ No newline at end of file + } -- cgit v1.2.3