boydl 3.73 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
#!/usr/bin/python
# encoding: utf-8

"""
boydl – Best Of Youtube DownLoader

This script fetches the Best Of YouTube RSS feed and downloads new videos.
Via an index every video is downloaded only once.

This is a complete Python rewrite from the former Bash implementation due
to RSS feed migrations.

13
Usage: boydl [ --help | -h | --version ]
14
15
16
17
18
19
20
21
22
23
24

Options:
    --help            Display this boydl's help and exit.
    --version         Display boydl's version and exit.
"""

__author__       = "Lysander Trischler"
__copyright__    = "Copyright 2011, Lysander Trischler"
__license__      = "WTFPL"
__maintainer__   = "Lysander Trischler"
__email__        = "software@lyse.isobeef.org"
25
26
__version__      = "2.1.1"
__version_info__ = (2, 1, 1)
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49

import os.path
import sys

me = os.path.basename(__file__)

for arg in sys.argv[1:]:
	if arg in ("--help", "-h"):
		print(__doc__.strip())
		exit(0)
	elif arg == "--version":
		print("%s Version %s" % (me, __version__))
		exit(0)
	else:
		sys.stderr.write("%s: Unknown argument `%s'!\n" % (me, arg))
		exit(2)

import xml.dom.minidom
import urllib
import re
import os

rss_url    = 'http://feeds.feedburner.com/bestofyoutubedotcom?format=xml'
Lysander Trischler's avatar
Lysander Trischler committed
50
ydl        = 'youtube-dl --restrict-filename -wo "' + os.path.expanduser("~") + '/Desktop/%(title)s-%(id)s.%(ext)s" -f best'
51
link_regex = re.compile(r'"http://img.youtube.com/vi/(.+)/[0-9]+.jpg"')
52
53
54
55
56
57
58
59
index_file = os.path.join(os.path.dirname(__file__), "boydl.index")
error_file = os.path.join(os.path.dirname(__file__), "boydl.error")

if rss_url.startswith(("http://", "https://")):
	rss_stream = urllib.urlopen(rss_url)
else:
	rss_stream = open(rss_url, "rb")

60
61
tree = xml.dom.minidom.parseString(rss_stream.read()
                                  .replace("é", "é")
62
                                  .replace("<br>", "&lt;br&gt;")
63
								  .replace("&ouml;", "ö")
64
                                  .replace("< here >", "here"))
65
66
67
68
69
70
71
72
73
74
75
rss_stream.close()
channel = tree.getElementsByTagName("rss")[0].getElementsByTagName("channel")[0]

for item in channel.getElementsByTagName("item"):

	#
	# first try to fetch the URL from the description, to bypass feed proxy
	#
	url = None
	m = link_regex.search(item.getElementsByTagName("description")[0].firstChild.data)
	if m:
76
		url = 'http://www.youtube.com/v/%s' % m.group(1)
Lysander Trischler's avatar
Lysander Trischler committed
77

78
79
80
81
	#
	# if description link cannot be parsed use feed proxy's URL
	#
	if url is None:
82
83
84
		print("%s: Cannot extract direct link to Best of YouTube website for '%s'."
		      " Using feed proxy's URL:" % (
				me, item.getElementsByTagName('title')[0].firstChild.data)),
85
		url = item.getElementsByTagName("link")[0].firstChild.data
86
		print(url)
Lysander Trischler's avatar
Lysander Trischler committed
87

88
89
90
91
92
93
94
95
96
	#
	# skip already downloaded videos
	# Always read the index newly so parallel running boydls may
	# not download the same video multiple times. Please note this
	# implementation is not 100% safe: It is possible two or more
	# boydl instances execute this code at the very same time and
	# therefore fetch the video twice or more often. Because this
	# scenario is considered to happen only very rarly we don't
	# care about it here. Most of the time this holds.
Lysander Trischler's avatar
Lysander Trischler committed
97
	#
98
99
100
101
102
103
104
105
106
107
108
109
	cont = False
	index_stream = open(index_file, "rb")
	for line in index_stream.readlines():
		if line.strip() == url:
			cont = True
			break
	index_stream.close()
	if cont: continue

	#
	# download video
	#
110
	if url is None:
111
112
113
114
115
		sys.stderr.write("%s: Cannot extract YouTube video URL!\n" % me)
	else:
		index_stream = open(index_file, "ab")
		index_stream.write("%s\n" % url)
		index_stream.close()
Lysander Trischler's avatar
Lysander Trischler committed
116

117
		cmd = ydl + ' ' + url
118
119
120
121
122
123
124
		print('—' * 80)
		exit_code = os.system(cmd) % 255
		if exit_code != 0:
			sys.stderr.write("'%s' failed with exit code %d\n" % (cmd, exit_code))
			error_stream = open(error_file, "ab")
			error_stream.write("%s\t%s\t%d\n" % (url, cmd, exit_code))
			error_stream.close()