Last active 1569718003

Generates an iTunes compatible podcast from a Youtube playlist, storing the audio files in a local directory. Could be adapted to store the files in Dropbox or on another media provider if desired | Requires python 2.7, requests, wget, and youtube-dl | Configuration at top of file

Revision 894b59d1d0d5b9188e3fc8b83da83ec4e4483cd4

ytPlaylistToPodcast.py Raw
1#!/usr/bin/env python2
2# This file is released as public domain by Steven Smith (blha303) in Apr 2015
3# In areas where public domain isn't a thing, I release it under the MIT license.
4# Although credit would be nice if you use this in something cool. And email me a link too pls.
5import time,os,requests,json,subprocess
6from urllib import urlretrieve
7
8DTFORMAT = "%a, %b %d %Y %H:%M:%S +0000" # Do not modify, needs to be at top
9
10playlisturl = "https://www.youtube.com/playlist?list=UU9CuvdOVfMPvKCiwdGKL3cQ"
11number_to_get = 30
12# generate your key via google's api dashboard. needs to have access to youtube's data api v3
13apikey = "AIzaSyAuwEEcpOGG230tm7na1KdO0tHFm2S_dIE"
14webroot = "http://domain.bla"
15webpath = "/var/www"
16outpdir = "/podcastdir"
17outpfn = "/{id}.m4a"
18xmlfn = outpdir + "/podcast.xml"
19
20podcast = dict(
21 self = webroot + xmlfn, # should point to xml
22 title = "A cool podcast",
23 link = "http://blha303.com.au",
24 description = "DAE podcast?",
25 copyright = "Copyright 2015 Youtube",
26 now = time.strftime(DTFORMAT),
27 language = "en-us",
28 subtitle = "Youtube is pretty cool, ey",
29 author = "Me",
30 summary = "Wip wap wop",
31 owner_name = "Me",
32 owner_email = "me@you.us",
33 image = webroot + outpdir + "/podcast.png",
34 category = "yay",
35 explicit = "yes" # or no
36)
37
38item_info = dict(
39 author = "Me",
40 summary = "Just more info",
41 category = "Blablabla",
42 keywords = "autogen"
43)
44
45BASE = u"""<?xml version="1.0" encoding="utf-8"?>
46 <rss xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd" xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
47 <channel>
48 <atom:link href="{self}" rel="self" type="application/rss+xml" />
49 <title>{title}</title>
50 <link>{link}</link>
51 <description>{description}</description>
52 <lastBuildDate>{now}</lastBuildDate>
53 <language>{language}</language>
54 <copyright>{copyright}</copyright>
55 <itunes:subtitle>{subtitle}</itunes:subtitle>
56 <itunes:author>{author}</itunes:author>
57 <itunes:summary>{summary}</itunes:summary>
58 <itunes:owner>
59 <itunes:name>{owner_name}</itunes:name>
60 <itunes:email>{owner_email}</itunes:email>
61 </itunes:owner>
62 <itunes:image href="{image}" />
63 <itunes:category text="{category}" />
64 """
65BASE2 = u"""<itunes:explicit>{explicit}</itunes:explicit>
66 </channel>
67 </rss>
68"""
69ITEM = u"""<item>
70 <title>{fulltitle}</title>
71 <link>https://www.youtube.com/watch?v={id}</link>
72 <itunes:author>{author}</itunes:author>
73 <description>{description}</description>
74 <itunes:summary>{summary}</itunes:summary>
75 <enclosure url="{lurl}" length="{size}" type="video/mp4"/>
76 <guid>{lurl}</guid>
77 <pubDate>{upload_date}</pubDate>
78 <itunes:order>{order}</itunes:order>
79 <itunes:duration>{duration}</itunes:duration>
80 <itunes:keywords>{keywords}</itunes:keywords>
81 <category>{category}</category>
82 <itunes:explicit>{explicit}</itunes:explicit>
83 </item>
84 """
85
86def get_time(id):
87 data = requests.get("https://www.googleapis.com/youtube/v3/videos", params={'id': id, 'part': "snippet,statistics,recordingDetails", "key": apikey}).json()
88 return time.strftime(DTFORMAT, time.strptime(data["items"][0]["snippet"]["publishedAt"], "%Y-%m-%dT%H:%M:%S.000Z"))
89
90def download_file(item):
91 fn = webpath + outpdir + outpfn.format(**item)
92 if not os.path.isfile(fn):
93 print " Downloading"
94 url = [i for i in item["formats"] if i["format_id"] == "nondash-140"][0]["url"]
95 print "".join(subprocess.check_output(["/usr/bin/wget", url, "-O", fn, "-nv"]).splitlines())
96 return fn
97 else:
98 print " File exists"
99 return fn
100
101def get_output(items):
102 outp = []
103 items = sorted(items, key=lambda k: k["upload_date"], reverse=True)
104 for x,item in enumerate(items):
105 item.update(item_info)
106 print "Processing {fulltitle} ({id})".format(**item)
107 m,s = divmod(item["duration"], 60)
108 h,m = divmod(m, 60)
109 item["order"] = x+1
110 item["description"] = item["description"].replace(u"\u25ba", u"&gt;")
111 item["duration"] = u"%d:%02d:%02d" % (h,m,s)
112 item["upload_date"] = get_time(item["id"])
113 item["size"] = str(os.path.getsize(download_file(item)))
114 item["explicit"] = podcast["explicit"]
115 item["lurl"] = webroot + outpdir + outpfn.format(**item)
116 outp.append(ITEM.format(**item))
117 print " Processed"
118 print "Process complete"
119 return BASE.format(**podcast) + "".join(outp) + BASE2.format(**podcast)
120
121if __name__ == "__main__":
122 print "Getting playlist data from youtube, this can take a while if the playlist is large..."
123 data = subprocess.check_output(['/usr/local/bin/youtube-dl', playlisturl, '--playlist-end', str(number_to_get), '--match-filter', 'duration > 300', '-f', '140', '-j']).splitlines()
124 print "Playlist data obtained, starting processing..."
125 with open(webpath + xmlfn, "w") as f:
126 f.write(get_output(json.loads(u"[" + u",".join(data) + u"]")).encode('ascii', 'ignore'))
127