# 4chan thread grabber import urllib import urllib2 import json API_URL = "https://api.4chan.org/%s/res/%s.json" IMAGE_URL = "https://images.4chan.org/%s/src/%s" def getThreadInfo(board, id): """board is the board name (g, for example); id is the thread id""" try: thread = json.loads(urllib2.urlopen(API_URL % (board, id)).read()) return thread except: return None def getImageList(thread): """thread is the thread json produced by getThreadInfo""" out = {} for i in thread["posts"]: if "tim" in i: out[i["no"]] = {"filename": i["filename"], "ext": i["ext"], "id": str(i["tim"]) } return out def downloadImage(board, imagename, filename=None): if not filename: filename = imagename return urllib.urlretrieve(IMAGE_URL % (board, imagename), filename) def downloadThread(board, id, showimages=True, getimages=True): thread = getThreadInfo(board, id) title = thread["posts"][0]["sub"] if "sub" in thread["posts"][0] else thread["posts"][0]["com"] images = getImageList(thread) x = 0 sp = " " with open(id + ".html", "w") as file: file.write(""" %s

%s

""" % (title, title)) for post in thread["posts"]: out = sp + "\n".format(id=str(post["no"])) if post["no"] in images and showimages: out += sp + "
\n" image = images[post["no"]] if getimages: downloadImage(board, image["id"] + image["ext"]) out += sp + " \n".format(filename=image["id"] + image["ext"]) else: out += sp + "
\n" out += sp + sp + post["com"].replace("href=\"" + id, "href=\"") + "\n" out += sp + "
\n" out += sp + "
\n" file.write(out) file.write(""" """)