Last active 1450763335

Steven Smith revised this gist 1378726426. Go to revision

1 file changed, 74 insertions

4chanthreadgrabber.py(file created)

@@ -0,0 +1,74 @@
1 + # 4chan thread grabber
2 +
3 + import urllib
4 + import urllib2
5 + import json
6 +
7 + API_URL = "https://api.4chan.org/%s/res/%s.json"
8 + IMAGE_URL = "https://images.4chan.org/%s/src/%s"
9 +
10 + def getThreadInfo(board, id):
11 + """board is the board name (g, for example); id is the thread id"""
12 + try:
13 + thread = json.loads(urllib2.urlopen(API_URL % (board, id)).read())
14 + return thread
15 + except:
16 + return None
17 +
18 + def getImageList(thread):
19 + """thread is the thread json produced by getThreadInfo"""
20 + out = {}
21 + for i in thread["posts"]:
22 + if "tim" in i:
23 + out[i["no"]] = {"filename": i["filename"],
24 + "ext": i["ext"],
25 + "id": str(i["tim"])
26 + }
27 + return out
28 +
29 + def downloadImage(board, imagename, filename=None):
30 + if not filename:
31 + filename = imagename
32 + return urllib.urlretrieve(IMAGE_URL % (board, imagename), filename)
33 +
34 + def downloadThread(board, id, showimages=True, getimages=True):
35 + thread = getThreadInfo(board, id)
36 + title = thread["posts"][0]["sub"] if "sub" in thread["posts"][0] else thread["posts"][0]["com"]
37 + images = getImageList(thread)
38 + x = 0
39 + sp = " "
40 + with open(id + ".html", "w") as file:
41 + file.write("""<html>
42 + <head>
43 + <style>
44 + .post
45 + {
46 + background-color: #BBBBBB;
47 + }
48 + .image
49 + {
50 + min-height: 100px;
51 + }
52 + </style>
53 + <title>%s</title>
54 + </head>
55 + <body>
56 + <h2>%s</h2>
57 + """ % (title, title))
58 + for post in thread["posts"]:
59 + out = sp + "<a name=\"p{id}\">\n".format(id=str(post["no"]))
60 + if post["no"] in images and showimages:
61 + out += sp + " <div class=\"post image\" onmouseover=\"this.style.backgroundColor = '#FFBBBB';\" onmouseout=\"this.style.backgroundColor = '#BBBBBB';\">\n"
62 + image = images[post["no"]]
63 + if getimages:
64 + downloadImage(board, image["id"] + image["ext"])
65 + out += sp + " <a href=\"{filename}\"><img src=\"{filename}\" align=\"left\" style=\"height: 100px; width: 125px;\"></a>\n".format(filename=image["id"] + image["ext"])
66 + else:
67 + out += sp + " <div class=\"post\" onmouseover=\"this.style.backgroundColor = '#FFBBBB';\" onmouseout=\"this.style.backgroundColor = '#BBBBBB';\">\n"
68 + out += sp + sp + post["com"].replace("href=\"" + id, "href=\"") + "\n"
69 + out += sp + " </div>\n"
70 + out += sp + "</a><hr>\n"
71 + file.write(out)
72 + file.write("""
73 + </body>
74 + </html>""")
Newer Older