#!/usr/bin/env python2 # This file is released as public domain by Steven Smith (blha303) in Apr 2015 # In areas where public domain isn't a thing, I release it under the MIT license. # Although credit would be nice if you use this in something cool. And email me a link too pls. import time,os,requests,json,subprocess from urllib import urlretrieve DTFORMAT = "%a, %b %d %Y %H:%M:%S +0000" # Do not modify, needs to be at top playlisturl = "https://www.youtube.com/playlist?list=UU9CuvdOVfMPvKCiwdGKL3cQ" number_to_get = 30 # generate your key via google's api dashboard. needs to have access to youtube's data api v3 apikey = "AIzaSyAuwEEcpOGG230tm7na1KdO0tHFm2S_dIE" webroot = "http://domain.bla" webpath = "/var/www" outpdir = "/podcastdir" outpfn = "/{id}.m4a" xmlfn = outpdir + "/podcast.xml" podcast = dict( self = webroot + xmlfn, # should point to xml title = "A cool podcast", link = "http://blha303.com.au", description = "DAE podcast?", copyright = "Copyright 2015 Youtube", now = time.strftime(DTFORMAT), language = "en-us", subtitle = "Youtube is pretty cool, ey", author = "Me", summary = "Wip wap wop", owner_name = "Me", owner_email = "me@you.us", image = webroot + outpdir + "/podcast.png", category = "yay", explicit = "yes" # or no ) item_info = dict( author = "Me", summary = "Just more info", category = "Blablabla", keywords = "autogen" ) BASE = u""" {title} {link} {description} {now} {language} {copyright} {subtitle} {author} {summary} {owner_name} {owner_email} """ BASE2 = u"""{explicit} """ ITEM = u""" {fulltitle} https://www.youtube.com/watch?v={id} {author} {description} {summary} {lurl} {upload_date} {order} {duration} {keywords} {category} {explicit} """ def get_time(id): data = requests.get("https://www.googleapis.com/youtube/v3/videos", params={'id': id, 'part': "snippet,statistics,recordingDetails", "key": apikey}).json() return time.strftime(DTFORMAT, time.strptime(data["items"][0]["snippet"]["publishedAt"], "%Y-%m-%dT%H:%M:%S.000Z")) def download_file(item): fn = webpath + outpdir + outpfn.format(**item) if not os.path.isfile(fn): print " Downloading" url = [i for i in item["formats"] if i["format_id"] == "nondash-140"][0]["url"] print "".join(subprocess.check_output(["wget", url, "-O", fn]).splitlines()[-5]) return fn else: print " File exists" return fn def get_output(items): outp = [] items = sorted(items, key=lambda k: k["upload_date"], reverse=True) for x,item in enumerate(items): item.update(item_info) print "Processing {fulltitle} ({id})".format(**item) m,s = divmod(item["duration"], 60) h,m = divmod(m, 60) item["order"] = x+1 item["description"] = item["description"].replace(u"\u25ba", u">") item["duration"] = u"%d:%02d:%02d" % (h,m,s) item["upload_date"] = get_time(item["id"]) item["size"] = str(os.path.getsize(download_file(item))) item["explicit"] = podcast["explicit"] item["lurl"] = webroot + outpdir + outpfn.format(**item) outp.append(ITEM.format(**item)) print " Processed" print "Process complete" return BASE.format(**podcast) + "".join(outp) + BASE2.format(**podcast) if __name__ == "__main__": print "Getting playlist data from youtube, this can take a while if the playlist is large..." data = subprocess.check_output(['youtube-dl', playlisturl, '--playlist-end', str(number_to_get), '--match-filter', 'duration > 300', '-f', '140', '-j']).splitlines() print "Playlist data obtained, starting processing..." with open(webpath + xmlfn, "w") as f: f.write(get_output(json.loads(u"[" + u",".join(data) + u"]")).encode('ascii', 'ignore'))