ytPlaylistToPodcast.py

Revision 894b59d1d0d5b9188e3fc8b83da83ec4e4483cd4

ytPlaylistToPodcast.py · 5.0 KiB · Python Raw

#!/usr/bin/env python2 # This file is released as public domain by Steven Smith (blha303) in Apr 2015 # In areas where public domain isn't a thing, I release it under the MIT license. # Although credit would be nice if you use this in something cool. And email me a link too pls. import time,os,requests,json,subprocess from urllib import urlretrieve DTFORMAT = "%a, %b %d %Y %H:%M:%S +0000" # Do not modify, needs to be at top playlisturl = "https://www.youtube.com/playlist?list=UU9CuvdOVfMPvKCiwdGKL3cQ" number_to_get = 30 # generate your key via google's api dashboard. needs to have access to youtube's data api v3 apikey = "AIzaSyAuwEEcpOGG230tm7na1KdO0tHFm2S_dIE" webroot = "http://domain.bla" webpath = "/var/www" outpdir = "/podcastdir" outpfn = "/{id}.m4a" xmlfn = outpdir + "/podcast.xml" podcast = dict( self = webroot + xmlfn, # should point to xml title = "A cool podcast", link = "http://blha303.com.au", description = "DAE podcast?", copyright = "Copyright 2015 Youtube", now = time.strftime(DTFORMAT), language = "en-us", subtitle = "Youtube is pretty cool, ey", author = "Me", summary = "Wip wap wop", owner_name = "Me", owner_email = "me@you.us", image = webroot + outpdir + "/podcast.png", category = "yay", explicit = "yes" # or no ) item_info = dict( author = "Me", summary = "Just more info", category = "Blablabla", keywords = "autogen" ) BASE = u"""<?xml version="1.0" encoding="utf-8"?> <rss xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd" xmlns:atom="http://www.w3.org/2005/Atom" version="2.0"> <channel> <atom:link href="{self}" rel="self" type="application/rss+xml" /> <title>{title}</title> <link>{link}</link> <description>{description}</description> <lastBuildDate>{now}</lastBuildDate> <language>{language}</language> <copyright>{copyright}</copyright> <itunes:subtitle>{subtitle}</itunes:subtitle> <itunes:author>{author}</itunes:author> <itunes:summary>{summary}</itunes:summary> <itunes:owner> <itunes:name>{owner_name}</itunes:name> <itunes:email>{owner_email}</itunes:email> </itunes:owner> <itunes:image href="{image}" /> <itunes:category text="{category}" /> """ BASE2 = u"""<itunes:explicit>{explicit}</itunes:explicit> </channel> </rss> """ ITEM = u"""<item> <title>{fulltitle}</title> <link>https://www.youtube.com/watch?v={id}</link> <itunes:author>{author}</itunes:author> <description>{description}</description> <itunes:summary>{summary}</itunes:summary> <enclosure url="{lurl}" length="{size}" type="video/mp4"/> <guid>{lurl}</guid> <pubDate>{upload_date}</pubDate> <itunes:order>{order}</itunes:order> <itunes:duration>{duration}</itunes:duration> <itunes:keywords>{keywords}</itunes:keywords> <category>{category}</category> <itunes:explicit>{explicit}</itunes:explicit> </item> """ def get_time(id): data = requests.get("https://www.googleapis.com/youtube/v3/videos", params={'id': id, 'part': "snippet,statistics,recordingDetails", "key": apikey}).json() return time.strftime(DTFORMAT, time.strptime(data["items"][0]["snippet"]["publishedAt"], "%Y-%m-%dT%H:%M:%S.000Z")) def download_file(item): fn = webpath + outpdir + outpfn.format(**item) if not os.path.isfile(fn): print " Downloading" url = [i for i in item["formats"] if i["format_id"] == "nondash-140"][0]["url"] print "".join(subprocess.check_output(["/usr/bin/wget", url, "-O", fn, "-nv"]).splitlines()) return fn else: print " File exists" return fn def get_output(items): outp = [] items = sorted(items, key=lambda k: k["upload_date"], reverse=True) for x,item in enumerate(items): item.update(item_info) print "Processing {fulltitle} ({id})".format(**item) m,s = divmod(item["duration"], 60) h,m = divmod(m, 60) item["order"] = x+1 item["description"] = item["description"].replace(u"\u25ba", u">") item["duration"] = u"%d:%02d:%02d" % (h,m,s) item["upload_date"] = get_time(item["id"]) item["size"] = str(os.path.getsize(download_file(item))) item["explicit"] = podcast["explicit"] item["lurl"] = webroot + outpdir + outpfn.format(**item) outp.append(ITEM.format(**item)) print " Processed" print "Process complete" return BASE.format(**podcast) + "".join(outp) + BASE2.format(**podcast) if __name__ == "__main__": print "Getting playlist data from youtube, this can take a while if the playlist is large..." data = subprocess.check_output(['/usr/local/bin/youtube-dl', playlisturl, '--playlist-end', str(number_to_get), '--match-filter', 'duration > 300', '-f', '140', '-j']).splitlines() print "Playlist data obtained, starting processing..." with open(webpath + xmlfn, "w") as f: f.write(get_output(json.loads(u"[" + u",".join(data) + u"]")).encode('ascii', 'ignore'))

1	#!/usr/bin/env python2
2	# This file is released as public domain by Steven Smith (blha303) in Apr 2015
3	# In areas where public domain isn't a thing, I release it under the MIT license.
4	# Although credit would be nice if you use this in something cool. And email me a link too pls.
5	import time,os,requests,json,subprocess
6	from urllib import urlretrieve
7
8	DTFORMAT = "%a, %b %d %Y %H:%M:%S +0000" # Do not modify, needs to be at top
9
10	playlisturl = "https://www.youtube.com/playlist?list=UU9CuvdOVfMPvKCiwdGKL3cQ"
11	number_to_get = 30
12	# generate your key via google's api dashboard. needs to have access to youtube's data api v3
13	apikey = "AIzaSyAuwEEcpOGG230tm7na1KdO0tHFm2S_dIE"
14	webroot = "http://domain.bla"
15	webpath = "/var/www"
16	outpdir = "/podcastdir"
17	outpfn = "/{id}.m4a"
18	xmlfn = outpdir + "/podcast.xml"
19
20	podcast = dict(
21	self = webroot + xmlfn, # should point to xml
22	title = "A cool podcast",
23	link = "http://blha303.com.au",
24	description = "DAE podcast?",
25	copyright = "Copyright 2015 Youtube",
26	now = time.strftime(DTFORMAT),
27	language = "en-us",
28	subtitle = "Youtube is pretty cool, ey",
29	author = "Me",
30	summary = "Wip wap wop",
31	owner_name = "Me",
32	owner_email = "me@you.us",
33	image = webroot + outpdir + "/podcast.png",
34	category = "yay",
35	explicit = "yes" # or no
36	)
37
38	item_info = dict(
39	author = "Me",
40	summary = "Just more info",
41	category = "Blablabla",
42	keywords = "autogen"
43	)
44
45	BASE = u"""<?xml version="1.0" encoding="utf-8"?>
46	<rss xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd" xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
47	<channel>
48	<atom:link href="{self}" rel="self" type="application/rss+xml" />
49	<title>{title}</title>
50	<link>{link}</link>
51	<description>{description}</description>
52	<lastBuildDate>{now}</lastBuildDate>
53	<language>{language}</language>
54	<copyright>{copyright}</copyright>
55	<itunes:subtitle>{subtitle}</itunes:subtitle>
56	<itunes:author>{author}</itunes:author>
57	<itunes:summary>{summary}</itunes:summary>
58	<itunes:owner>
59	<itunes:name>{owner_name}</itunes:name>
60	<itunes:email>{owner_email}</itunes:email>
61	</itunes:owner>
62	<itunes:image href="{image}" />
63	<itunes:category text="{category}" />
64	"""
65	BASE2 = u"""<itunes:explicit>{explicit}</itunes:explicit>
66	</channel>
67	</rss>
68	"""
69	ITEM = u"""<item>
70	<title>{fulltitle}</title>
71	<link>https://www.youtube.com/watch?v={id}</link>
72	<itunes:author>{author}</itunes:author>
73	<description>{description}</description>
74	<itunes:summary>{summary}</itunes:summary>
75	<enclosure url="{lurl}" length="{size}" type="video/mp4"/>
76	<guid>{lurl}</guid>
77	<pubDate>{upload_date}</pubDate>
78	<itunes:order>{order}</itunes:order>
79	<itunes:duration>{duration}</itunes:duration>
80	<itunes:keywords>{keywords}</itunes:keywords>
81	<category>{category}</category>
82	<itunes:explicit>{explicit}</itunes:explicit>
83	</item>
84	"""
85
86	def get_time(id):
87	data = requests.get("https://www.googleapis.com/youtube/v3/videos", params={'id': id, 'part': "snippet,statistics,recordingDetails", "key": apikey}).json()
88	return time.strftime(DTFORMAT, time.strptime(data["items"][0]["snippet"]["publishedAt"], "%Y-%m-%dT%H:%M:%S.000Z"))
89
90	def download_file(item):
91	fn = webpath + outpdir + outpfn.format(**item)
92	if not os.path.isfile(fn):
93	print " Downloading"
94	url = [i for i in item["formats"] if i["format_id"] == "nondash-140"][0]["url"]
95	print "".join(subprocess.check_output(["/usr/bin/wget", url, "-O", fn, "-nv"]).splitlines())
96	return fn
97	else:
98	print " File exists"
99	return fn
100
101	def get_output(items):
102	outp = []
103	items = sorted(items, key=lambda k: k["upload_date"], reverse=True)
104	for x,item in enumerate(items):
105	item.update(item_info)
106	print "Processing {fulltitle} ({id})".format(**item)
107	m,s = divmod(item["duration"], 60)
108	h,m = divmod(m, 60)
109	item["order"] = x+1
110	item["description"] = item["description"].replace(u"\u25ba", u">")
111	item["duration"] = u"%d:%02d:%02d" % (h,m,s)
112	item["upload_date"] = get_time(item["id"])
113	item["size"] = str(os.path.getsize(download_file(item)))
114	item["explicit"] = podcast["explicit"]
115	item["lurl"] = webroot + outpdir + outpfn.format(**item)
116	outp.append(ITEM.format(**item))
117	print " Processed"
118	print "Process complete"
119	return BASE.format(podcast) + "".join(outp) + BASE2.format(podcast)
120
121	if __name__ == "__main__":
122	print "Getting playlist data from youtube, this can take a while if the playlist is large..."
123	data = subprocess.check_output(['/usr/local/bin/youtube-dl', playlisturl, '--playlist-end', str(number_to_get), '--match-filter', 'duration > 300', '-f', '140', '-j']).splitlines()
124	print "Playlist data obtained, starting processing..."
125	with open(webpath + xmlfn, "w") as f:
126	f.write(get_output(json.loads(u"[" + u",".join(data) + u"]")).encode('ascii', 'ignore'))
127