# RSS feed rewriter # For adding more content from the target page that isn't included normally # For an example output, compare # http://www.escapistmagazine.com/rss/videos/list/1.xml # and # http://irc.lazle.co/xml/zeropunctuation.xml # I added Escapist's embed code loaded from their site in the item description. # A maybe better example is # http://www.escapistmagazine.com/rss/articles/columns/extra-punctuation.xml # and # http://irc.lazle.co/xml/extrapunctuation.xml # I wanted the whole article text in my feed reader :D # Here's an example usage: https://gist.github.com/blha303/36a1c9eef45cf75df904 # (from a much earlier version of the script. fully functional, but there are # differences between them) # Future plans: # Need to make this more accessible for others # Clean up # Copyright 2013 Steven Smith (blha303). All Rights Reserved. # New BSD license # http://www.opensource.org/licenses/BSD-3-Clause import yql # I like how simple feed importing is with yql import sys from time import sleep from urllib import urlopen from bs4 import BeautifulSoup as Soup yql_env = "http://datatables.org/alltables.env" YQL = yql.Public() def main(feedurl, title="A feed!", link="http://google.com", desc=":)", debug=False, aprint=False): filename = "" # Location output of this file is being read to. used for checking for feed updates # Copy in the top of the original XML file if you're not sure. base = """ {title} {link} en-us http://blogs.law.harvard.edu/tech/rss """.format(title=title, link=link, description=desc) end = """ """ if filename != "": with open(filename, "r") as f: olddata = f.read() oldsoup = Soup(olddata) else: oldsoup = None query = "SELECT title,link,description,pubDate,category from rss where url=@feed" # add more fields to retrieve if needed result = YQL.execute(query, {"feed": feedurl}, env=yql_env) if not result.rows: return "No response?" if oldsoup: if oldsoup.find('item'): if oldsoup.find('item').find('title').text == result.rows[0]["title"]: # No new articles if debug: print "No new articles" if aprint: print str(olddata) return str(olddata) if debug: print result.rows if aprint: print base items = [] for row in result.rows: if debug: print "Description for " + row["title"] description = row['description'] # use beautifulsoup or something to retrieve the info you want # Add more tags below if needed in the dest feed items.append(""" {title} {url} {url} {date} {category} """.format(title=row["title"].replace("&", "&"), url=row["link"].split("?")[0], description=description.replace("&", "&"), date=row["pubDate"], category=row["category"])) if debug: print "Finished " + title if aprint: print items[-1] sleep(1) # to avoid hammering the site in description lookups if aprint: print end return base + "".join(items) + end if __name__ == "__main__": if len(sys.argv) > 1: if sys.argv[1] == "debug": print main("http://irc.lazle.co/xml/zeropunctuation.xml", debug=True) elif sys.argv[1] == "aprint": main("http://irc.lazle.co/xml/zeropunctuation.xml", aprint=True) else: print main("http://irc.lazle.co/xml/zeropunctuation.xml")