Revision of ytPlaylistToPodcast.py

Steven Smith revised this gist 1469631334. Go to revision

1 file changed, 1 insertion, 1 deletion

ytPlaylistToPodcast.py

			@@ -10,7 +10,7 @@ DTFORMAT = "%a, %b %d %Y %H:%M:%S +0000" # Do not modify, needs to be at top
10	10		playlisturl = "https://www.youtube.com/playlist?list=UU9CuvdOVfMPvKCiwdGKL3cQ"
11	11		number_to_get = 30
12	12		# generate your key via google's api dashboard. needs to have access to youtube's data api v3
13		-	apikey = "AIzaSyAuwEEcpOGG230tm7na1KdO0tHFm2S_dIE"
	13	+	apikey = ""
14	14		webroot = "http://domain.bla"
15	15		webpath = "/var/www"
16	16		outpdir = "/podcastdir"

Steven Smith revised this gist 1430723838. Go to revision

1 file changed, 2 insertions, 2 deletions

ytPlaylistToPodcast.py

			@@ -92,7 +92,7 @@ def download_file(item):
92	92		if not os.path.isfile(fn):
93	93		print " Downloading"
94	94		url = [i for i in item["formats"] if i["format_id"] == "nondash-140"][0]["url"]
95		-	print "".join(subprocess.check_output(["wget", url, "-O", fn]).splitlines()[-5])
	95	+	print "".join(subprocess.check_output(["/usr/bin/wget", url, "-O", fn, "-nv"]).splitlines())
96	96		return fn
97	97		else:
98	98		print " File exists"
			@@ -120,7 +120,7 @@ def get_output(items):
120	120
121	121		if __name__ == "__main__":
122	122		print "Getting playlist data from youtube, this can take a while if the playlist is large..."
123		-	data = subprocess.check_output(['youtube-dl', playlisturl, '--playlist-end', str(number_to_get), '--match-filter', 'duration > 300', '-f', '140', '-j']).splitlines()
	123	+	data = subprocess.check_output(['/usr/local/bin/youtube-dl', playlisturl, '--playlist-end', str(number_to_get), '--match-filter', 'duration > 300', '-f', '140', '-j']).splitlines()
124	124		print "Playlist data obtained, starting processing..."
125	125		with open(webpath + xmlfn, "w") as f:
126	126		f.write(get_output(json.loads(u"[" + u",".join(data) + u"]")).encode('ascii', 'ignore'))

Steven Smith revised this gist 1430581002. Go to revision

1 file changed, 2 insertions, 1 deletion

ytPlaylistToPodcast.py

			@@ -8,6 +8,7 @@ from urllib import urlretrieve
8	8		DTFORMAT = "%a, %b %d %Y %H:%M:%S +0000" # Do not modify, needs to be at top
9	9
10	10		playlisturl = "https://www.youtube.com/playlist?list=UU9CuvdOVfMPvKCiwdGKL3cQ"
	11	+	number_to_get = 30
11	12		# generate your key via google's api dashboard. needs to have access to youtube's data api v3
12	13		apikey = "AIzaSyAuwEEcpOGG230tm7na1KdO0tHFm2S_dIE"
13	14		webroot = "http://domain.bla"
			@@ -119,7 +120,7 @@ def get_output(items):
119	120
120	121		if __name__ == "__main__":
121	122		print "Getting playlist data from youtube, this can take a while if the playlist is large..."
122		-	data = subprocess.check_output(['youtube-dl', playlisturl, '--playlist-end', '30', '--match-filter', 'duration > 300', '-f', '140', '-j']).splitlines()
	123	+	data = subprocess.check_output(['youtube-dl', playlisturl, '--playlist-end', str(number_to_get), '--match-filter', 'duration > 300', '-f', '140', '-j']).splitlines()
123	124		print "Playlist data obtained, starting processing..."
124	125		with open(webpath + xmlfn, "w") as f:
125	126		f.write(get_output(json.loads(u"[" + u",".join(data) + u"]")).encode('ascii', 'ignore'))

Steven Smith revised this gist 1430578749. Go to revision

1 file changed, 125 insertions

ytPlaylistToPodcast.py(file created)

		@@ -0,0 +1,125 @@
1	+	#!/usr/bin/env python2
2	+	# This file is released as public domain by Steven Smith (blha303) in Apr 2015
3	+	# In areas where public domain isn't a thing, I release it under the MIT license.
4	+	# Although credit would be nice if you use this in something cool. And email me a link too pls.
5	+	import time,os,requests,json,subprocess
6	+	from urllib import urlretrieve
7	+
8	+	DTFORMAT = "%a, %b %d %Y %H:%M:%S +0000" # Do not modify, needs to be at top
9	+
10	+	playlisturl = "https://www.youtube.com/playlist?list=UU9CuvdOVfMPvKCiwdGKL3cQ"
11	+	# generate your key via google's api dashboard. needs to have access to youtube's data api v3
12	+	apikey = "AIzaSyAuwEEcpOGG230tm7na1KdO0tHFm2S_dIE"
13	+	webroot = "http://domain.bla"
14	+	webpath = "/var/www"
15	+	outpdir = "/podcastdir"
16	+	outpfn = "/{id}.m4a"
17	+	xmlfn = outpdir + "/podcast.xml"
18	+
19	+	podcast = dict(
20	+	self = webroot + xmlfn, # should point to xml
21	+	title = "A cool podcast",
22	+	link = "http://blha303.com.au",
23	+	description = "DAE podcast?",
24	+	copyright = "Copyright 2015 Youtube",
25	+	now = time.strftime(DTFORMAT),
26	+	language = "en-us",
27	+	subtitle = "Youtube is pretty cool, ey",
28	+	author = "Me",
29	+	summary = "Wip wap wop",
30	+	owner_name = "Me",
31	+	owner_email = "me@you.us",
32	+	image = webroot + outpdir + "/podcast.png",
33	+	category = "yay",
34	+	explicit = "yes" # or no
35	+	)
36	+
37	+	item_info = dict(
38	+	author = "Me",
39	+	summary = "Just more info",
40	+	category = "Blablabla",
41	+	keywords = "autogen"
42	+	)
43	+
44	+	BASE = u"""<?xml version="1.0" encoding="utf-8"?>
45	+	<rss xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd" xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
46	+	<channel>
47	+	<atom:link href="{self}" rel="self" type="application/rss+xml" />
48	+	<title>{title}</title>
49	+	<link>{link}</link>
50	+	<description>{description}</description>
51	+	<lastBuildDate>{now}</lastBuildDate>
52	+	<language>{language}</language>
53	+	<copyright>{copyright}</copyright>
54	+	<itunes:subtitle>{subtitle}</itunes:subtitle>
55	+	<itunes:author>{author}</itunes:author>
56	+	<itunes:summary>{summary}</itunes:summary>
57	+	<itunes:owner>
58	+	<itunes:name>{owner_name}</itunes:name>
59	+	<itunes:email>{owner_email}</itunes:email>
60	+	</itunes:owner>
61	+	<itunes:image href="{image}" />
62	+	<itunes:category text="{category}" />
63	+	"""
64	+	BASE2 = u"""<itunes:explicit>{explicit}</itunes:explicit>
65	+	</channel>
66	+	</rss>
67	+	"""
68	+	ITEM = u"""<item>
69	+	<title>{fulltitle}</title>
70	+	<link>https://www.youtube.com/watch?v={id}</link>
71	+	<itunes:author>{author}</itunes:author>
72	+	<description>{description}</description>
73	+	<itunes:summary>{summary}</itunes:summary>
74	+	<enclosure url="{lurl}" length="{size}" type="video/mp4"/>
75	+	<guid>{lurl}</guid>
76	+	<pubDate>{upload_date}</pubDate>
77	+	<itunes:order>{order}</itunes:order>
78	+	<itunes:duration>{duration}</itunes:duration>
79	+	<itunes:keywords>{keywords}</itunes:keywords>
80	+	<category>{category}</category>
81	+	<itunes:explicit>{explicit}</itunes:explicit>
82	+	</item>
83	+	"""
84	+
85	+	def get_time(id):
86	+	data = requests.get("https://www.googleapis.com/youtube/v3/videos", params={'id': id, 'part': "snippet,statistics,recordingDetails", "key": apikey}).json()
87	+	return time.strftime(DTFORMAT, time.strptime(data["items"][0]["snippet"]["publishedAt"], "%Y-%m-%dT%H:%M:%S.000Z"))
88	+
89	+	def download_file(item):
90	+	fn = webpath + outpdir + outpfn.format(**item)
91	+	if not os.path.isfile(fn):
92	+	print " Downloading"
93	+	url = [i for i in item["formats"] if i["format_id"] == "nondash-140"][0]["url"]
94	+	print "".join(subprocess.check_output(["wget", url, "-O", fn]).splitlines()[-5])
95	+	return fn
96	+	else:
97	+	print " File exists"
98	+	return fn
99	+
100	+	def get_output(items):
101	+	outp = []
102	+	items = sorted(items, key=lambda k: k["upload_date"], reverse=True)
103	+	for x,item in enumerate(items):
104	+	item.update(item_info)
105	+	print "Processing {fulltitle} ({id})".format(**item)
106	+	m,s = divmod(item["duration"], 60)
107	+	h,m = divmod(m, 60)
108	+	item["order"] = x+1
109	+	item["description"] = item["description"].replace(u"\u25ba", u">")
110	+	item["duration"] = u"%d:%02d:%02d" % (h,m,s)
111	+	item["upload_date"] = get_time(item["id"])
112	+	item["size"] = str(os.path.getsize(download_file(item)))
113	+	item["explicit"] = podcast["explicit"]
114	+	item["lurl"] = webroot + outpdir + outpfn.format(**item)
115	+	outp.append(ITEM.format(**item))
116	+	print " Processed"
117	+	print "Process complete"
118	+	return BASE.format(podcast) + "".join(outp) + BASE2.format(podcast)
119	+
120	+	if __name__ == "__main__":
121	+	print "Getting playlist data from youtube, this can take a while if the playlist is large..."
122	+	data = subprocess.check_output(['youtube-dl', playlisturl, '--playlist-end', '30', '--match-filter', 'duration > 300', '-f', '140', '-j']).splitlines()
123	+	print "Playlist data obtained, starting processing..."
124	+	with open(webpath + xmlfn, "w") as f:
125	+	f.write(get_output(json.loads(u"[" + u",".join(data) + u"]")).encode('ascii', 'ignore'))

Newer Older