Steven Smith revised this gist . Go to revision
1 file changed, 74 insertions
4chanthreadgrabber.py(file created)
| @@ -0,0 +1,74 @@ | |||
| 1 | + | # 4chan thread grabber | |
| 2 | + | ||
| 3 | + | import urllib | |
| 4 | + | import urllib2 | |
| 5 | + | import json | |
| 6 | + | ||
| 7 | + | API_URL = "https://api.4chan.org/%s/res/%s.json" | |
| 8 | + | IMAGE_URL = "https://images.4chan.org/%s/src/%s" | |
| 9 | + | ||
| 10 | + | def getThreadInfo(board, id): | |
| 11 | + | """board is the board name (g, for example); id is the thread id""" | |
| 12 | + | try: | |
| 13 | + | thread = json.loads(urllib2.urlopen(API_URL % (board, id)).read()) | |
| 14 | + | return thread | |
| 15 | + | except: | |
| 16 | + | return None | |
| 17 | + | ||
| 18 | + | def getImageList(thread): | |
| 19 | + | """thread is the thread json produced by getThreadInfo""" | |
| 20 | + | out = {} | |
| 21 | + | for i in thread["posts"]: | |
| 22 | + | if "tim" in i: | |
| 23 | + | out[i["no"]] = {"filename": i["filename"], | |
| 24 | + | "ext": i["ext"], | |
| 25 | + | "id": str(i["tim"]) | |
| 26 | + | } | |
| 27 | + | return out | |
| 28 | + | ||
| 29 | + | def downloadImage(board, imagename, filename=None): | |
| 30 | + | if not filename: | |
| 31 | + | filename = imagename | |
| 32 | + | return urllib.urlretrieve(IMAGE_URL % (board, imagename), filename) | |
| 33 | + | ||
| 34 | + | def downloadThread(board, id, showimages=True, getimages=True): | |
| 35 | + | thread = getThreadInfo(board, id) | |
| 36 | + | title = thread["posts"][0]["sub"] if "sub" in thread["posts"][0] else thread["posts"][0]["com"] | |
| 37 | + | images = getImageList(thread) | |
| 38 | + | x = 0 | |
| 39 | + | sp = " " | |
| 40 | + | with open(id + ".html", "w") as file: | |
| 41 | + | file.write("""<html> | |
| 42 | + | <head> | |
| 43 | + | <style> | |
| 44 | + | .post | |
| 45 | + | { | |
| 46 | + | background-color: #BBBBBB; | |
| 47 | + | } | |
| 48 | + | .image | |
| 49 | + | { | |
| 50 | + | min-height: 100px; | |
| 51 | + | } | |
| 52 | + | </style> | |
| 53 | + | <title>%s</title> | |
| 54 | + | </head> | |
| 55 | + | <body> | |
| 56 | + | <h2>%s</h2> | |
| 57 | + | """ % (title, title)) | |
| 58 | + | for post in thread["posts"]: | |
| 59 | + | out = sp + "<a name=\"p{id}\">\n".format(id=str(post["no"])) | |
| 60 | + | if post["no"] in images and showimages: | |
| 61 | + | out += sp + " <div class=\"post image\" onmouseover=\"this.style.backgroundColor = '#FFBBBB';\" onmouseout=\"this.style.backgroundColor = '#BBBBBB';\">\n" | |
| 62 | + | image = images[post["no"]] | |
| 63 | + | if getimages: | |
| 64 | + | downloadImage(board, image["id"] + image["ext"]) | |
| 65 | + | out += sp + " <a href=\"{filename}\"><img src=\"{filename}\" align=\"left\" style=\"height: 100px; width: 125px;\"></a>\n".format(filename=image["id"] + image["ext"]) | |
| 66 | + | else: | |
| 67 | + | out += sp + " <div class=\"post\" onmouseover=\"this.style.backgroundColor = '#FFBBBB';\" onmouseout=\"this.style.backgroundColor = '#BBBBBB';\">\n" | |
| 68 | + | out += sp + sp + post["com"].replace("href=\"" + id, "href=\"") + "\n" | |
| 69 | + | out += sp + " </div>\n" | |
| 70 | + | out += sp + "</a><hr>\n" | |
| 71 | + | file.write(out) | |
| 72 | + | file.write(""" | |
| 73 | + | </body> | |
| 74 | + | </html>""") | |
Newer
Older