Last active 1450763335

4chanthreadgrabber.py Raw
1# 4chan thread grabber
2
3import urllib
4import urllib2
5import json
6
7API_URL = "https://api.4chan.org/%s/res/%s.json"
8IMAGE_URL = "https://images.4chan.org/%s/src/%s"
9
10def getThreadInfo(board, id):
11 """board is the board name (g, for example); id is the thread id"""
12 try:
13 thread = json.loads(urllib2.urlopen(API_URL % (board, id)).read())
14 return thread
15 except:
16 return None
17
18def getImageList(thread):
19 """thread is the thread json produced by getThreadInfo"""
20 out = {}
21 for i in thread["posts"]:
22 if "tim" in i:
23 out[i["no"]] = {"filename": i["filename"],
24 "ext": i["ext"],
25 "id": str(i["tim"])
26 }
27 return out
28
29def downloadImage(board, imagename, filename=None):
30 if not filename:
31 filename = imagename
32 return urllib.urlretrieve(IMAGE_URL % (board, imagename), filename)
33
34def downloadThread(board, id, showimages=True, getimages=True):
35 thread = getThreadInfo(board, id)
36 title = thread["posts"][0]["sub"] if "sub" in thread["posts"][0] else thread["posts"][0]["com"]
37 images = getImageList(thread)
38 x = 0
39 sp = " "
40 with open(id + ".html", "w") as file:
41 file.write("""<html>
42 <head>
43 <style>
44 .post
45 {
46 background-color: #BBBBBB;
47 }
48 .image
49 {
50 min-height: 100px;
51 }
52 </style>
53 <title>%s</title>
54 </head>
55 <body>
56 <h2>%s</h2>
57""" % (title, title))
58 for post in thread["posts"]:
59 out = sp + "<a name=\"p{id}\">\n".format(id=str(post["no"]))
60 if post["no"] in images and showimages:
61 out += sp + " <div class=\"post image\" onmouseover=\"this.style.backgroundColor = '#FFBBBB';\" onmouseout=\"this.style.backgroundColor = '#BBBBBB';\">\n"
62 image = images[post["no"]]
63 if getimages:
64 downloadImage(board, image["id"] + image["ext"])
65 out += sp + " <a href=\"{filename}\"><img src=\"{filename}\" align=\"left\" style=\"height: 100px; width: 125px;\"></a>\n".format(filename=image["id"] + image["ext"])
66 else:
67 out += sp + " <div class=\"post\" onmouseover=\"this.style.backgroundColor = '#FFBBBB';\" onmouseout=\"this.style.backgroundColor = '#BBBBBB';\">\n"
68 out += sp + sp + post["com"].replace("href=\"" + id, "href=\"") + "\n"
69 out += sp + " </div>\n"
70 out += sp + "</a><hr>\n"
71 file.write(out)
72 file.write("""
73 </body>
74</html>""")