Last active 1569718003

Generates an iTunes compatible podcast from a Youtube playlist, storing the audio files in a local directory. Could be adapted to store the files in Dropbox or on another media provider if desired | Requires python 2.7, requests, wget, and youtube-dl | Configuration at top of file

Steven Smith revised this gist 1469631334. Go to revision

1 file changed, 1 insertion, 1 deletion

ytPlaylistToPodcast.py

@@ -10,7 +10,7 @@ DTFORMAT = "%a, %b %d %Y %H:%M:%S +0000" # Do not modify, needs to be at top
10 10 playlisturl = "https://www.youtube.com/playlist?list=UU9CuvdOVfMPvKCiwdGKL3cQ"
11 11 number_to_get = 30
12 12 # generate your key via google's api dashboard. needs to have access to youtube's data api v3
13 - apikey = "AIzaSyAuwEEcpOGG230tm7na1KdO0tHFm2S_dIE"
13 + apikey = ""
14 14 webroot = "http://domain.bla"
15 15 webpath = "/var/www"
16 16 outpdir = "/podcastdir"

Steven Smith revised this gist 1430723838. Go to revision

1 file changed, 2 insertions, 2 deletions

ytPlaylistToPodcast.py

@@ -92,7 +92,7 @@ def download_file(item):
92 92 if not os.path.isfile(fn):
93 93 print " Downloading"
94 94 url = [i for i in item["formats"] if i["format_id"] == "nondash-140"][0]["url"]
95 - print "".join(subprocess.check_output(["wget", url, "-O", fn]).splitlines()[-5])
95 + print "".join(subprocess.check_output(["/usr/bin/wget", url, "-O", fn, "-nv"]).splitlines())
96 96 return fn
97 97 else:
98 98 print " File exists"
@@ -120,7 +120,7 @@ def get_output(items):
120 120
121 121 if __name__ == "__main__":
122 122 print "Getting playlist data from youtube, this can take a while if the playlist is large..."
123 - data = subprocess.check_output(['youtube-dl', playlisturl, '--playlist-end', str(number_to_get), '--match-filter', 'duration > 300', '-f', '140', '-j']).splitlines()
123 + data = subprocess.check_output(['/usr/local/bin/youtube-dl', playlisturl, '--playlist-end', str(number_to_get), '--match-filter', 'duration > 300', '-f', '140', '-j']).splitlines()
124 124 print "Playlist data obtained, starting processing..."
125 125 with open(webpath + xmlfn, "w") as f:
126 126 f.write(get_output(json.loads(u"[" + u",".join(data) + u"]")).encode('ascii', 'ignore'))

Steven Smith revised this gist 1430581002. Go to revision

1 file changed, 2 insertions, 1 deletion

ytPlaylistToPodcast.py

@@ -8,6 +8,7 @@ from urllib import urlretrieve
8 8 DTFORMAT = "%a, %b %d %Y %H:%M:%S +0000" # Do not modify, needs to be at top
9 9
10 10 playlisturl = "https://www.youtube.com/playlist?list=UU9CuvdOVfMPvKCiwdGKL3cQ"
11 + number_to_get = 30
11 12 # generate your key via google's api dashboard. needs to have access to youtube's data api v3
12 13 apikey = "AIzaSyAuwEEcpOGG230tm7na1KdO0tHFm2S_dIE"
13 14 webroot = "http://domain.bla"
@@ -119,7 +120,7 @@ def get_output(items):
119 120
120 121 if __name__ == "__main__":
121 122 print "Getting playlist data from youtube, this can take a while if the playlist is large..."
122 - data = subprocess.check_output(['youtube-dl', playlisturl, '--playlist-end', '30', '--match-filter', 'duration > 300', '-f', '140', '-j']).splitlines()
123 + data = subprocess.check_output(['youtube-dl', playlisturl, '--playlist-end', str(number_to_get), '--match-filter', 'duration > 300', '-f', '140', '-j']).splitlines()
123 124 print "Playlist data obtained, starting processing..."
124 125 with open(webpath + xmlfn, "w") as f:
125 126 f.write(get_output(json.loads(u"[" + u",".join(data) + u"]")).encode('ascii', 'ignore'))

Steven Smith revised this gist 1430578749. Go to revision

1 file changed, 125 insertions

ytPlaylistToPodcast.py(file created)

@@ -0,0 +1,125 @@
1 + #!/usr/bin/env python2
2 + # This file is released as public domain by Steven Smith (blha303) in Apr 2015
3 + # In areas where public domain isn't a thing, I release it under the MIT license.
4 + # Although credit would be nice if you use this in something cool. And email me a link too pls.
5 + import time,os,requests,json,subprocess
6 + from urllib import urlretrieve
7 +
8 + DTFORMAT = "%a, %b %d %Y %H:%M:%S +0000" # Do not modify, needs to be at top
9 +
10 + playlisturl = "https://www.youtube.com/playlist?list=UU9CuvdOVfMPvKCiwdGKL3cQ"
11 + # generate your key via google's api dashboard. needs to have access to youtube's data api v3
12 + apikey = "AIzaSyAuwEEcpOGG230tm7na1KdO0tHFm2S_dIE"
13 + webroot = "http://domain.bla"
14 + webpath = "/var/www"
15 + outpdir = "/podcastdir"
16 + outpfn = "/{id}.m4a"
17 + xmlfn = outpdir + "/podcast.xml"
18 +
19 + podcast = dict(
20 + self = webroot + xmlfn, # should point to xml
21 + title = "A cool podcast",
22 + link = "http://blha303.com.au",
23 + description = "DAE podcast?",
24 + copyright = "Copyright 2015 Youtube",
25 + now = time.strftime(DTFORMAT),
26 + language = "en-us",
27 + subtitle = "Youtube is pretty cool, ey",
28 + author = "Me",
29 + summary = "Wip wap wop",
30 + owner_name = "Me",
31 + owner_email = "me@you.us",
32 + image = webroot + outpdir + "/podcast.png",
33 + category = "yay",
34 + explicit = "yes" # or no
35 + )
36 +
37 + item_info = dict(
38 + author = "Me",
39 + summary = "Just more info",
40 + category = "Blablabla",
41 + keywords = "autogen"
42 + )
43 +
44 + BASE = u"""<?xml version="1.0" encoding="utf-8"?>
45 + <rss xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd" xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
46 + <channel>
47 + <atom:link href="{self}" rel="self" type="application/rss+xml" />
48 + <title>{title}</title>
49 + <link>{link}</link>
50 + <description>{description}</description>
51 + <lastBuildDate>{now}</lastBuildDate>
52 + <language>{language}</language>
53 + <copyright>{copyright}</copyright>
54 + <itunes:subtitle>{subtitle}</itunes:subtitle>
55 + <itunes:author>{author}</itunes:author>
56 + <itunes:summary>{summary}</itunes:summary>
57 + <itunes:owner>
58 + <itunes:name>{owner_name}</itunes:name>
59 + <itunes:email>{owner_email}</itunes:email>
60 + </itunes:owner>
61 + <itunes:image href="{image}" />
62 + <itunes:category text="{category}" />
63 + """
64 + BASE2 = u"""<itunes:explicit>{explicit}</itunes:explicit>
65 + </channel>
66 + </rss>
67 + """
68 + ITEM = u"""<item>
69 + <title>{fulltitle}</title>
70 + <link>https://www.youtube.com/watch?v={id}</link>
71 + <itunes:author>{author}</itunes:author>
72 + <description>{description}</description>
73 + <itunes:summary>{summary}</itunes:summary>
74 + <enclosure url="{lurl}" length="{size}" type="video/mp4"/>
75 + <guid>{lurl}</guid>
76 + <pubDate>{upload_date}</pubDate>
77 + <itunes:order>{order}</itunes:order>
78 + <itunes:duration>{duration}</itunes:duration>
79 + <itunes:keywords>{keywords}</itunes:keywords>
80 + <category>{category}</category>
81 + <itunes:explicit>{explicit}</itunes:explicit>
82 + </item>
83 + """
84 +
85 + def get_time(id):
86 + data = requests.get("https://www.googleapis.com/youtube/v3/videos", params={'id': id, 'part': "snippet,statistics,recordingDetails", "key": apikey}).json()
87 + return time.strftime(DTFORMAT, time.strptime(data["items"][0]["snippet"]["publishedAt"], "%Y-%m-%dT%H:%M:%S.000Z"))
88 +
89 + def download_file(item):
90 + fn = webpath + outpdir + outpfn.format(**item)
91 + if not os.path.isfile(fn):
92 + print " Downloading"
93 + url = [i for i in item["formats"] if i["format_id"] == "nondash-140"][0]["url"]
94 + print "".join(subprocess.check_output(["wget", url, "-O", fn]).splitlines()[-5])
95 + return fn
96 + else:
97 + print " File exists"
98 + return fn
99 +
100 + def get_output(items):
101 + outp = []
102 + items = sorted(items, key=lambda k: k["upload_date"], reverse=True)
103 + for x,item in enumerate(items):
104 + item.update(item_info)
105 + print "Processing {fulltitle} ({id})".format(**item)
106 + m,s = divmod(item["duration"], 60)
107 + h,m = divmod(m, 60)
108 + item["order"] = x+1
109 + item["description"] = item["description"].replace(u"\u25ba", u"&gt;")
110 + item["duration"] = u"%d:%02d:%02d" % (h,m,s)
111 + item["upload_date"] = get_time(item["id"])
112 + item["size"] = str(os.path.getsize(download_file(item)))
113 + item["explicit"] = podcast["explicit"]
114 + item["lurl"] = webroot + outpdir + outpfn.format(**item)
115 + outp.append(ITEM.format(**item))
116 + print " Processed"
117 + print "Process complete"
118 + return BASE.format(**podcast) + "".join(outp) + BASE2.format(**podcast)
119 +
120 + if __name__ == "__main__":
121 + print "Getting playlist data from youtube, this can take a while if the playlist is large..."
122 + data = subprocess.check_output(['youtube-dl', playlisturl, '--playlist-end', '30', '--match-filter', 'duration > 300', '-f', '140', '-j']).splitlines()
123 + print "Playlist data obtained, starting processing..."
124 + with open(webpath + xmlfn, "w") as f:
125 + f.write(get_output(json.loads(u"[" + u",".join(data) + u"]")).encode('ascii', 'ignore'))
Newer Older