ytPlaylistToPodcast.py
· 4.9 KiB · Python
Raw
#!/usr/bin/env python2
# This file is released as public domain by Steven Smith (blha303) in Apr 2015
# In areas where public domain isn't a thing, I release it under the MIT license.
# Although credit would be nice if you use this in something cool. And email me a link too pls.
import time,os,requests,json,subprocess
from urllib import urlretrieve
DTFORMAT = "%a, %b %d %Y %H:%M:%S +0000" # Do not modify, needs to be at top
playlisturl = "https://www.youtube.com/playlist?list=UU9CuvdOVfMPvKCiwdGKL3cQ"
number_to_get = 30
# generate your key via google's api dashboard. needs to have access to youtube's data api v3
apikey = ""
webroot = "http://domain.bla"
webpath = "/var/www"
outpdir = "/podcastdir"
outpfn = "/{id}.m4a"
xmlfn = outpdir + "/podcast.xml"
podcast = dict(
self = webroot + xmlfn, # should point to xml
title = "A cool podcast",
link = "http://blha303.com.au",
description = "DAE podcast?",
copyright = "Copyright 2015 Youtube",
now = time.strftime(DTFORMAT),
language = "en-us",
subtitle = "Youtube is pretty cool, ey",
author = "Me",
summary = "Wip wap wop",
owner_name = "Me",
owner_email = "me@you.us",
image = webroot + outpdir + "/podcast.png",
category = "yay",
explicit = "yes" # or no
)
item_info = dict(
author = "Me",
summary = "Just more info",
category = "Blablabla",
keywords = "autogen"
)
BASE = u"""<?xml version="1.0" encoding="utf-8"?>
<rss xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd" xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
<channel>
<atom:link href="{self}" rel="self" type="application/rss+xml" />
<title>{title}</title>
<link>{link}</link>
<description>{description}</description>
<lastBuildDate>{now}</lastBuildDate>
<language>{language}</language>
<copyright>{copyright}</copyright>
<itunes:subtitle>{subtitle}</itunes:subtitle>
<itunes:author>{author}</itunes:author>
<itunes:summary>{summary}</itunes:summary>
<itunes:owner>
<itunes:name>{owner_name}</itunes:name>
<itunes:email>{owner_email}</itunes:email>
</itunes:owner>
<itunes:image href="{image}" />
<itunes:category text="{category}" />
"""
BASE2 = u"""<itunes:explicit>{explicit}</itunes:explicit>
</channel>
</rss>
"""
ITEM = u"""<item>
<title>{fulltitle}</title>
<link>https://www.youtube.com/watch?v={id}</link>
<itunes:author>{author}</itunes:author>
<description>{description}</description>
<itunes:summary>{summary}</itunes:summary>
<enclosure url="{lurl}" length="{size}" type="video/mp4"/>
<guid>{lurl}</guid>
<pubDate>{upload_date}</pubDate>
<itunes:order>{order}</itunes:order>
<itunes:duration>{duration}</itunes:duration>
<itunes:keywords>{keywords}</itunes:keywords>
<category>{category}</category>
<itunes:explicit>{explicit}</itunes:explicit>
</item>
"""
def get_time(id):
data = requests.get("https://www.googleapis.com/youtube/v3/videos", params={'id': id, 'part': "snippet,statistics,recordingDetails", "key": apikey}).json()
return time.strftime(DTFORMAT, time.strptime(data["items"][0]["snippet"]["publishedAt"], "%Y-%m-%dT%H:%M:%S.000Z"))
def download_file(item):
fn = webpath + outpdir + outpfn.format(**item)
if not os.path.isfile(fn):
print " Downloading"
url = [i for i in item["formats"] if i["format_id"] == "nondash-140"][0]["url"]
print "".join(subprocess.check_output(["/usr/bin/wget", url, "-O", fn, "-nv"]).splitlines())
return fn
else:
print " File exists"
return fn
def get_output(items):
outp = []
items = sorted(items, key=lambda k: k["upload_date"], reverse=True)
for x,item in enumerate(items):
item.update(item_info)
print "Processing {fulltitle} ({id})".format(**item)
m,s = divmod(item["duration"], 60)
h,m = divmod(m, 60)
item["order"] = x+1
item["description"] = item["description"].replace(u"\u25ba", u">")
item["duration"] = u"%d:%02d:%02d" % (h,m,s)
item["upload_date"] = get_time(item["id"])
item["size"] = str(os.path.getsize(download_file(item)))
item["explicit"] = podcast["explicit"]
item["lurl"] = webroot + outpdir + outpfn.format(**item)
outp.append(ITEM.format(**item))
print " Processed"
print "Process complete"
return BASE.format(**podcast) + "".join(outp) + BASE2.format(**podcast)
if __name__ == "__main__":
print "Getting playlist data from youtube, this can take a while if the playlist is large..."
data = subprocess.check_output(['/usr/local/bin/youtube-dl', playlisturl, '--playlist-end', str(number_to_get), '--match-filter', 'duration > 300', '-f', '140', '-j']).splitlines()
print "Playlist data obtained, starting processing..."
with open(webpath + xmlfn, "w") as f:
f.write(get_output(json.loads(u"[" + u",".join(data) + u"]")).encode('ascii', 'ignore'))
| 1 | #!/usr/bin/env python2 |
| 2 | # This file is released as public domain by Steven Smith (blha303) in Apr 2015 |
| 3 | # In areas where public domain isn't a thing, I release it under the MIT license. |
| 4 | # Although credit would be nice if you use this in something cool. And email me a link too pls. |
| 5 | import time,os,requests,json,subprocess |
| 6 | from urllib import urlretrieve |
| 7 | |
| 8 | DTFORMAT = "%a, %b %d %Y %H:%M:%S +0000" # Do not modify, needs to be at top |
| 9 | |
| 10 | playlisturl = "https://www.youtube.com/playlist?list=UU9CuvdOVfMPvKCiwdGKL3cQ" |
| 11 | number_to_get = 30 |
| 12 | # generate your key via google's api dashboard. needs to have access to youtube's data api v3 |
| 13 | apikey = "" |
| 14 | webroot = "http://domain.bla" |
| 15 | webpath = "/var/www" |
| 16 | outpdir = "/podcastdir" |
| 17 | outpfn = "/{id}.m4a" |
| 18 | xmlfn = outpdir + "/podcast.xml" |
| 19 | |
| 20 | podcast = dict( |
| 21 | self = webroot + xmlfn, # should point to xml |
| 22 | title = "A cool podcast", |
| 23 | link = "http://blha303.com.au", |
| 24 | description = "DAE podcast?", |
| 25 | copyright = "Copyright 2015 Youtube", |
| 26 | now = time.strftime(DTFORMAT), |
| 27 | language = "en-us", |
| 28 | subtitle = "Youtube is pretty cool, ey", |
| 29 | author = "Me", |
| 30 | summary = "Wip wap wop", |
| 31 | owner_name = "Me", |
| 32 | owner_email = "me@you.us", |
| 33 | image = webroot + outpdir + "/podcast.png", |
| 34 | category = "yay", |
| 35 | explicit = "yes" # or no |
| 36 | ) |
| 37 | |
| 38 | item_info = dict( |
| 39 | author = "Me", |
| 40 | summary = "Just more info", |
| 41 | category = "Blablabla", |
| 42 | keywords = "autogen" |
| 43 | ) |
| 44 | |
| 45 | BASE = u"""<?xml version="1.0" encoding="utf-8"?> |
| 46 | <rss xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd" xmlns:atom="http://www.w3.org/2005/Atom" version="2.0"> |
| 47 | <channel> |
| 48 | <atom:link href="{self}" rel="self" type="application/rss+xml" /> |
| 49 | <title>{title}</title> |
| 50 | <link>{link}</link> |
| 51 | <description>{description}</description> |
| 52 | <lastBuildDate>{now}</lastBuildDate> |
| 53 | <language>{language}</language> |
| 54 | <copyright>{copyright}</copyright> |
| 55 | <itunes:subtitle>{subtitle}</itunes:subtitle> |
| 56 | <itunes:author>{author}</itunes:author> |
| 57 | <itunes:summary>{summary}</itunes:summary> |
| 58 | <itunes:owner> |
| 59 | <itunes:name>{owner_name}</itunes:name> |
| 60 | <itunes:email>{owner_email}</itunes:email> |
| 61 | </itunes:owner> |
| 62 | <itunes:image href="{image}" /> |
| 63 | <itunes:category text="{category}" /> |
| 64 | """ |
| 65 | BASE2 = u"""<itunes:explicit>{explicit}</itunes:explicit> |
| 66 | </channel> |
| 67 | </rss> |
| 68 | """ |
| 69 | ITEM = u"""<item> |
| 70 | <title>{fulltitle}</title> |
| 71 | <link>https://www.youtube.com/watch?v={id}</link> |
| 72 | <itunes:author>{author}</itunes:author> |
| 73 | <description>{description}</description> |
| 74 | <itunes:summary>{summary}</itunes:summary> |
| 75 | <enclosure url="{lurl}" length="{size}" type="video/mp4"/> |
| 76 | <guid>{lurl}</guid> |
| 77 | <pubDate>{upload_date}</pubDate> |
| 78 | <itunes:order>{order}</itunes:order> |
| 79 | <itunes:duration>{duration}</itunes:duration> |
| 80 | <itunes:keywords>{keywords}</itunes:keywords> |
| 81 | <category>{category}</category> |
| 82 | <itunes:explicit>{explicit}</itunes:explicit> |
| 83 | </item> |
| 84 | """ |
| 85 | |
| 86 | def get_time(id): |
| 87 | data = requests.get("https://www.googleapis.com/youtube/v3/videos", params={'id': id, 'part': "snippet,statistics,recordingDetails", "key": apikey}).json() |
| 88 | return time.strftime(DTFORMAT, time.strptime(data["items"][0]["snippet"]["publishedAt"], "%Y-%m-%dT%H:%M:%S.000Z")) |
| 89 | |
| 90 | def download_file(item): |
| 91 | fn = webpath + outpdir + outpfn.format(**item) |
| 92 | if not os.path.isfile(fn): |
| 93 | print " Downloading" |
| 94 | url = [i for i in item["formats"] if i["format_id"] == "nondash-140"][0]["url"] |
| 95 | print "".join(subprocess.check_output(["/usr/bin/wget", url, "-O", fn, "-nv"]).splitlines()) |
| 96 | return fn |
| 97 | else: |
| 98 | print " File exists" |
| 99 | return fn |
| 100 | |
| 101 | def get_output(items): |
| 102 | outp = [] |
| 103 | items = sorted(items, key=lambda k: k["upload_date"], reverse=True) |
| 104 | for x,item in enumerate(items): |
| 105 | item.update(item_info) |
| 106 | print "Processing {fulltitle} ({id})".format(**item) |
| 107 | m,s = divmod(item["duration"], 60) |
| 108 | h,m = divmod(m, 60) |
| 109 | item["order"] = x+1 |
| 110 | item["description"] = item["description"].replace(u"\u25ba", u">") |
| 111 | item["duration"] = u"%d:%02d:%02d" % (h,m,s) |
| 112 | item["upload_date"] = get_time(item["id"]) |
| 113 | item["size"] = str(os.path.getsize(download_file(item))) |
| 114 | item["explicit"] = podcast["explicit"] |
| 115 | item["lurl"] = webroot + outpdir + outpfn.format(**item) |
| 116 | outp.append(ITEM.format(**item)) |
| 117 | print " Processed" |
| 118 | print "Process complete" |
| 119 | return BASE.format(**podcast) + "".join(outp) + BASE2.format(**podcast) |
| 120 | |
| 121 | if __name__ == "__main__": |
| 122 | print "Getting playlist data from youtube, this can take a while if the playlist is large..." |
| 123 | data = subprocess.check_output(['/usr/local/bin/youtube-dl', playlisturl, '--playlist-end', str(number_to_get), '--match-filter', 'duration > 300', '-f', '140', '-j']).splitlines() |
| 124 | print "Playlist data obtained, starting processing..." |
| 125 | with open(webpath + xmlfn, "w") as f: |
| 126 | f.write(get_output(json.loads(u"[" + u",".join(data) + u"]")).encode('ascii', 'ignore')) |
| 127 |