ytPlaylistToPodcast.py
                        
                             · 4.9 KiB · Python
                        
                    
                    
                      
                        Raw
                      
                      
                        
                          
                        
                    
                    
                
                
            #!/usr/bin/env python2
# This file is released as public domain by Steven Smith (blha303) in Apr 2015
# In areas where public domain isn't a thing, I release it under the MIT license.
# Although credit would be nice if you use this in something cool. And email me a link too pls.
import time,os,requests,json,subprocess
from urllib import urlretrieve
DTFORMAT = "%a, %b %d %Y %H:%M:%S +0000" # Do not modify, needs to be at top
playlisturl = "https://www.youtube.com/playlist?list=UU9CuvdOVfMPvKCiwdGKL3cQ"
number_to_get = 30
# generate your key via google's api dashboard. needs to have access to youtube's data api v3
apikey = ""
webroot = "http://domain.bla"
webpath = "/var/www"
outpdir = "/podcastdir"
outpfn  = "/{id}.m4a"
xmlfn = outpdir + "/podcast.xml"
podcast = dict(
  self = webroot + xmlfn, # should point to xml
  title = "A cool podcast",
  link = "http://blha303.com.au",
  description = "DAE podcast?",
  copyright = "Copyright 2015 Youtube",
  now = time.strftime(DTFORMAT),
  language = "en-us",
  subtitle = "Youtube is pretty cool, ey",
  author = "Me",
  summary = "Wip wap wop",
  owner_name = "Me",
  owner_email = "me@you.us",
  image = webroot + outpdir + "/podcast.png",
  category = "yay",
  explicit = "yes" # or no
)
item_info = dict(
  author = "Me",
  summary = "Just more info",
  category = "Blablabla",
  keywords = "autogen"
)
BASE = u"""<?xml version="1.0" encoding="utf-8"?>
 <rss xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd" xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
 <channel>
 <atom:link href="{self}" rel="self" type="application/rss+xml" />
     <title>{title}</title>
     <link>{link}</link>
     <description>{description}</description>
     <lastBuildDate>{now}</lastBuildDate>
     <language>{language}</language>
     <copyright>{copyright}</copyright>
     <itunes:subtitle>{subtitle}</itunes:subtitle>
     <itunes:author>{author}</itunes:author>
     <itunes:summary>{summary}</itunes:summary>
     <itunes:owner>
         <itunes:name>{owner_name}</itunes:name>
         <itunes:email>{owner_email}</itunes:email>
     </itunes:owner>
     <itunes:image href="{image}" />
     <itunes:category text="{category}" />
     """
BASE2 = u"""<itunes:explicit>{explicit}</itunes:explicit>
 </channel>
 </rss>
"""
ITEM = u"""<item>
         <title>{fulltitle}</title>
         <link>https://www.youtube.com/watch?v={id}</link>
         <itunes:author>{author}</itunes:author>
         <description>{description}</description>
         <itunes:summary>{summary}</itunes:summary>
         <enclosure url="{lurl}" length="{size}" type="video/mp4"/>
         <guid>{lurl}</guid>
         <pubDate>{upload_date}</pubDate>
         <itunes:order>{order}</itunes:order>
         <itunes:duration>{duration}</itunes:duration>
         <itunes:keywords>{keywords}</itunes:keywords>
         <category>{category}</category>
         <itunes:explicit>{explicit}</itunes:explicit>
     </item>
     """
def get_time(id):
    data = requests.get("https://www.googleapis.com/youtube/v3/videos", params={'id': id, 'part': "snippet,statistics,recordingDetails", "key": apikey}).json()
    return time.strftime(DTFORMAT, time.strptime(data["items"][0]["snippet"]["publishedAt"], "%Y-%m-%dT%H:%M:%S.000Z"))
def download_file(item):
    fn = webpath + outpdir + outpfn.format(**item)
    if not os.path.isfile(fn):
        print " Downloading"
        url = [i for i in item["formats"] if i["format_id"] == "nondash-140"][0]["url"]
        print "".join(subprocess.check_output(["/usr/bin/wget", url, "-O", fn, "-nv"]).splitlines())
        return fn
    else:
        print " File exists"
        return fn
def get_output(items):
    outp = []
    items = sorted(items, key=lambda k: k["upload_date"], reverse=True)
    for x,item in enumerate(items):
        item.update(item_info)
        print "Processing {fulltitle} ({id})".format(**item)
        m,s = divmod(item["duration"], 60)
        h,m = divmod(m, 60)
        item["order"] = x+1
        item["description"] = item["description"].replace(u"\u25ba", u">")
        item["duration"] = u"%d:%02d:%02d" % (h,m,s)
        item["upload_date"] = get_time(item["id"])
        item["size"] = str(os.path.getsize(download_file(item)))
        item["explicit"] = podcast["explicit"]
        item["lurl"] = webroot + outpdir + outpfn.format(**item)
        outp.append(ITEM.format(**item))
        print " Processed"
    print "Process complete"
    return BASE.format(**podcast) + "".join(outp) + BASE2.format(**podcast)
if __name__ == "__main__":
    print "Getting playlist data from youtube, this can take a while if the playlist is large..."
    data = subprocess.check_output(['/usr/local/bin/youtube-dl', playlisturl, '--playlist-end', str(number_to_get), '--match-filter', 'duration > 300', '-f', '140', '-j']).splitlines()
    print "Playlist data obtained, starting processing..."
    with open(webpath + xmlfn, "w") as f:
        f.write(get_output(json.loads(u"[" + u",".join(data) + u"]")).encode('ascii', 'ignore'))
                | 1 | #!/usr/bin/env python2 | 
| 2 | # This file is released as public domain by Steven Smith (blha303) in Apr 2015 | 
| 3 | # In areas where public domain isn't a thing, I release it under the MIT license. | 
| 4 | # Although credit would be nice if you use this in something cool. And email me a link too pls. | 
| 5 | import time,os,requests,json,subprocess | 
| 6 | from urllib import urlretrieve | 
| 7 | |
| 8 | DTFORMAT = "%a, %b %d %Y %H:%M:%S +0000" # Do not modify, needs to be at top | 
| 9 | |
| 10 | playlisturl = "https://www.youtube.com/playlist?list=UU9CuvdOVfMPvKCiwdGKL3cQ" | 
| 11 | number_to_get = 30 | 
| 12 | # generate your key via google's api dashboard. needs to have access to youtube's data api v3 | 
| 13 | apikey = "" | 
| 14 | webroot = "http://domain.bla" | 
| 15 | webpath = "/var/www" | 
| 16 | outpdir = "/podcastdir" | 
| 17 | outpfn = "/{id}.m4a" | 
| 18 | xmlfn = outpdir + "/podcast.xml" | 
| 19 | |
| 20 | podcast = dict( | 
| 21 | self = webroot + xmlfn, # should point to xml | 
| 22 | title = "A cool podcast", | 
| 23 | link = "http://blha303.com.au", | 
| 24 | description = "DAE podcast?", | 
| 25 | copyright = "Copyright 2015 Youtube", | 
| 26 | now = time.strftime(DTFORMAT), | 
| 27 | language = "en-us", | 
| 28 | subtitle = "Youtube is pretty cool, ey", | 
| 29 | author = "Me", | 
| 30 | summary = "Wip wap wop", | 
| 31 | owner_name = "Me", | 
| 32 | owner_email = "me@you.us", | 
| 33 | image = webroot + outpdir + "/podcast.png", | 
| 34 | category = "yay", | 
| 35 | explicit = "yes" # or no | 
| 36 | ) | 
| 37 | |
| 38 | item_info = dict( | 
| 39 | author = "Me", | 
| 40 | summary = "Just more info", | 
| 41 | category = "Blablabla", | 
| 42 | keywords = "autogen" | 
| 43 | ) | 
| 44 | |
| 45 | BASE = u"""<?xml version="1.0" encoding="utf-8"?> | 
| 46 | <rss xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd" xmlns:atom="http://www.w3.org/2005/Atom" version="2.0"> | 
| 47 | <channel> | 
| 48 | <atom:link href="{self}" rel="self" type="application/rss+xml" /> | 
| 49 | <title>{title}</title> | 
| 50 | <link>{link}</link> | 
| 51 | <description>{description}</description> | 
| 52 | <lastBuildDate>{now}</lastBuildDate> | 
| 53 | <language>{language}</language> | 
| 54 | <copyright>{copyright}</copyright> | 
| 55 | <itunes:subtitle>{subtitle}</itunes:subtitle> | 
| 56 | <itunes:author>{author}</itunes:author> | 
| 57 | <itunes:summary>{summary}</itunes:summary> | 
| 58 | <itunes:owner> | 
| 59 | <itunes:name>{owner_name}</itunes:name> | 
| 60 | <itunes:email>{owner_email}</itunes:email> | 
| 61 | </itunes:owner> | 
| 62 | <itunes:image href="{image}" /> | 
| 63 | <itunes:category text="{category}" /> | 
| 64 | """ | 
| 65 | BASE2 = u"""<itunes:explicit>{explicit}</itunes:explicit> | 
| 66 | </channel> | 
| 67 | </rss> | 
| 68 | """ | 
| 69 | ITEM = u"""<item> | 
| 70 | <title>{fulltitle}</title> | 
| 71 | <link>https://www.youtube.com/watch?v={id}</link> | 
| 72 | <itunes:author>{author}</itunes:author> | 
| 73 | <description>{description}</description> | 
| 74 | <itunes:summary>{summary}</itunes:summary> | 
| 75 | <enclosure url="{lurl}" length="{size}" type="video/mp4"/> | 
| 76 | <guid>{lurl}</guid> | 
| 77 | <pubDate>{upload_date}</pubDate> | 
| 78 | <itunes:order>{order}</itunes:order> | 
| 79 | <itunes:duration>{duration}</itunes:duration> | 
| 80 | <itunes:keywords>{keywords}</itunes:keywords> | 
| 81 | <category>{category}</category> | 
| 82 | <itunes:explicit>{explicit}</itunes:explicit> | 
| 83 | </item> | 
| 84 | """ | 
| 85 | |
| 86 | def get_time(id): | 
| 87 | data = requests.get("https://www.googleapis.com/youtube/v3/videos", params={'id': id, 'part': "snippet,statistics,recordingDetails", "key": apikey}).json() | 
| 88 | return time.strftime(DTFORMAT, time.strptime(data["items"][0]["snippet"]["publishedAt"], "%Y-%m-%dT%H:%M:%S.000Z")) | 
| 89 | |
| 90 | def download_file(item): | 
| 91 | fn = webpath + outpdir + outpfn.format(**item) | 
| 92 | if not os.path.isfile(fn): | 
| 93 | print " Downloading" | 
| 94 | url = [i for i in item["formats"] if i["format_id"] == "nondash-140"][0]["url"] | 
| 95 | print "".join(subprocess.check_output(["/usr/bin/wget", url, "-O", fn, "-nv"]).splitlines()) | 
| 96 | return fn | 
| 97 | else: | 
| 98 | print " File exists" | 
| 99 | return fn | 
| 100 | |
| 101 | def get_output(items): | 
| 102 | outp = [] | 
| 103 | items = sorted(items, key=lambda k: k["upload_date"], reverse=True) | 
| 104 | for x,item in enumerate(items): | 
| 105 | item.update(item_info) | 
| 106 | print "Processing {fulltitle} ({id})".format(**item) | 
| 107 | m,s = divmod(item["duration"], 60) | 
| 108 | h,m = divmod(m, 60) | 
| 109 | item["order"] = x+1 | 
| 110 | item["description"] = item["description"].replace(u"\u25ba", u">") | 
| 111 | item["duration"] = u"%d:%02d:%02d" % (h,m,s) | 
| 112 | item["upload_date"] = get_time(item["id"]) | 
| 113 | item["size"] = str(os.path.getsize(download_file(item))) | 
| 114 | item["explicit"] = podcast["explicit"] | 
| 115 | item["lurl"] = webroot + outpdir + outpfn.format(**item) | 
| 116 | outp.append(ITEM.format(**item)) | 
| 117 | print " Processed" | 
| 118 | print "Process complete" | 
| 119 | return BASE.format(**podcast) + "".join(outp) + BASE2.format(**podcast) | 
| 120 | |
| 121 | if __name__ == "__main__": | 
| 122 | print "Getting playlist data from youtube, this can take a while if the playlist is large..." | 
| 123 | data = subprocess.check_output(['/usr/local/bin/youtube-dl', playlisturl, '--playlist-end', str(number_to_get), '--match-filter', 'duration > 300', '-f', '140', '-j']).splitlines() | 
| 124 | print "Playlist data obtained, starting processing..." | 
| 125 | with open(webpath + xmlfn, "w") as f: | 
| 126 | f.write(get_output(json.loads(u"[" + u",".join(data) + u"]")).encode('ascii', 'ignore')) | 
| 127 |