#!/usr/bin/python # encoding: utf-8 """ boydl – Best Of Youtube DownLoader This script fetches the Best Of YouTube RSS feed and downloads new videos. Via an index every video is downloaded only once. This is a complete Python rewrite from the former Bash implementation due to RSS feed migrations. Usage: boydl [ --help | -h | --version ] Options: --help Display this boydl's help and exit. --version Display boydl's version and exit. """ __author__ = "Lysander Trischler" __copyright__ = "Copyright 2011, Lysander Trischler" __license__ = "WTFPL" __maintainer__ = "Lysander Trischler" __email__ = "software@lyse.isobeef.org" __version__ = "2.1.1" __version_info__ = (2, 1, 1) import os.path import sys me = os.path.basename(__file__) for arg in sys.argv[1:]: if arg in ("--help", "-h"): print(__doc__.strip()) exit(0) elif arg == "--version": print("%s Version %s" % (me, __version__)) exit(0) else: sys.stderr.write("%s: Unknown argument `%s'!\n" % (me, arg)) exit(2) import xml.dom.minidom import urllib import re import os rss_url = 'http://feeds.feedburner.com/bestofyoutubedotcom?format=xml' ydl = 'youtube-dl --restrict-filename -wo "' + os.path.expanduser("~") + '/Desktop/%(title)s-%(id)s.%(ext)s" -f best' link_regex = re.compile(r'"http://img.youtube.com/vi/(.+)/[0-9]+.jpg"') index_file = os.path.join(os.path.dirname(__file__), "boydl.index") error_file = os.path.join(os.path.dirname(__file__), "boydl.error") if rss_url.startswith(("http://", "https://")): rss_stream = urllib.urlopen(rss_url) else: rss_stream = open(rss_url, "rb") tree = xml.dom.minidom.parseString(rss_stream.read() .replace("é", "é") .replace("
", "<br>") .replace("ö", "ö") .replace("< here >", "here")) rss_stream.close() channel = tree.getElementsByTagName("rss")[0].getElementsByTagName("channel")[0] for item in channel.getElementsByTagName("item"): # # first try to fetch the URL from the description, to bypass feed proxy # url = None m = link_regex.search(item.getElementsByTagName("description")[0].firstChild.data) if m: url = 'http://www.youtube.com/v/%s' % m.group(1) # # if description link cannot be parsed use feed proxy's URL # if url is None: print("%s: Cannot extract direct link to Best of YouTube website for '%s'." " Using feed proxy's URL:" % ( me, item.getElementsByTagName('title')[0].firstChild.data)), url = item.getElementsByTagName("link")[0].firstChild.data print(url) # # skip already downloaded videos # Always read the index newly so parallel running boydls may # not download the same video multiple times. Please note this # implementation is not 100% safe: It is possible two or more # boydl instances execute this code at the very same time and # therefore fetch the video twice or more often. Because this # scenario is considered to happen only very rarly we don't # care about it here. Most of the time this holds. # cont = False index_stream = open(index_file, "rb") for line in index_stream.readlines(): if line.strip() == url: cont = True break index_stream.close() if cont: continue # # download video # if url is None: sys.stderr.write("%s: Cannot extract YouTube video URL!\n" % me) else: index_stream = open(index_file, "ab") index_stream.write("%s\n" % url) index_stream.close() cmd = ydl + ' ' + url print('—' * 80) exit_code = os.system(cmd) % 255 if exit_code != 0: sys.stderr.write("'%s' failed with exit code %d\n" % (cmd, exit_code)) error_stream = open(error_file, "ab") error_stream.write("%s\t%s\t%d\n" % (url, cmd, exit_code)) error_stream.close()