4chanthreadgrabber.py
· 2.5 KiB · Python
Raw
# 4chan thread grabber
import urllib
import urllib2
import json
API_URL = "https://api.4chan.org/%s/res/%s.json"
IMAGE_URL = "https://images.4chan.org/%s/src/%s"
def getThreadInfo(board, id):
"""board is the board name (g, for example); id is the thread id"""
try:
thread = json.loads(urllib2.urlopen(API_URL % (board, id)).read())
return thread
except:
return None
def getImageList(thread):
"""thread is the thread json produced by getThreadInfo"""
out = {}
for i in thread["posts"]:
if "tim" in i:
out[i["no"]] = {"filename": i["filename"],
"ext": i["ext"],
"id": str(i["tim"])
}
return out
def downloadImage(board, imagename, filename=None):
if not filename:
filename = imagename
return urllib.urlretrieve(IMAGE_URL % (board, imagename), filename)
def downloadThread(board, id, showimages=True, getimages=True):
thread = getThreadInfo(board, id)
title = thread["posts"][0]["sub"] if "sub" in thread["posts"][0] else thread["posts"][0]["com"]
images = getImageList(thread)
x = 0
sp = " "
with open(id + ".html", "w") as file:
file.write("""<html>
<head>
<style>
.post
{
background-color: #BBBBBB;
}
.image
{
min-height: 100px;
}
</style>
<title>%s</title>
</head>
<body>
<h2>%s</h2>
""" % (title, title))
for post in thread["posts"]:
out = sp + "<a name=\"p{id}\">\n".format(id=str(post["no"]))
if post["no"] in images and showimages:
out += sp + " <div class=\"post image\" onmouseover=\"this.style.backgroundColor = '#FFBBBB';\" onmouseout=\"this.style.backgroundColor = '#BBBBBB';\">\n"
image = images[post["no"]]
if getimages:
downloadImage(board, image["id"] + image["ext"])
out += sp + " <a href=\"{filename}\"><img src=\"{filename}\" align=\"left\" style=\"height: 100px; width: 125px;\"></a>\n".format(filename=image["id"] + image["ext"])
else:
out += sp + " <div class=\"post\" onmouseover=\"this.style.backgroundColor = '#FFBBBB';\" onmouseout=\"this.style.backgroundColor = '#BBBBBB';\">\n"
out += sp + sp + post["com"].replace("href=\"" + id, "href=\"") + "\n"
out += sp + " </div>\n"
out += sp + "</a><hr>\n"
file.write(out)
file.write("""
</body>
</html>""")
| 1 | # 4chan thread grabber |
| 2 | |
| 3 | import urllib |
| 4 | import urllib2 |
| 5 | import json |
| 6 | |
| 7 | API_URL = "https://api.4chan.org/%s/res/%s.json" |
| 8 | IMAGE_URL = "https://images.4chan.org/%s/src/%s" |
| 9 | |
| 10 | def getThreadInfo(board, id): |
| 11 | """board is the board name (g, for example); id is the thread id""" |
| 12 | try: |
| 13 | thread = json.loads(urllib2.urlopen(API_URL % (board, id)).read()) |
| 14 | return thread |
| 15 | except: |
| 16 | return None |
| 17 | |
| 18 | def getImageList(thread): |
| 19 | """thread is the thread json produced by getThreadInfo""" |
| 20 | out = {} |
| 21 | for i in thread["posts"]: |
| 22 | if "tim" in i: |
| 23 | out[i["no"]] = {"filename": i["filename"], |
| 24 | "ext": i["ext"], |
| 25 | "id": str(i["tim"]) |
| 26 | } |
| 27 | return out |
| 28 | |
| 29 | def downloadImage(board, imagename, filename=None): |
| 30 | if not filename: |
| 31 | filename = imagename |
| 32 | return urllib.urlretrieve(IMAGE_URL % (board, imagename), filename) |
| 33 | |
| 34 | def downloadThread(board, id, showimages=True, getimages=True): |
| 35 | thread = getThreadInfo(board, id) |
| 36 | title = thread["posts"][0]["sub"] if "sub" in thread["posts"][0] else thread["posts"][0]["com"] |
| 37 | images = getImageList(thread) |
| 38 | x = 0 |
| 39 | sp = " " |
| 40 | with open(id + ".html", "w") as file: |
| 41 | file.write("""<html> |
| 42 | <head> |
| 43 | <style> |
| 44 | .post |
| 45 | { |
| 46 | background-color: #BBBBBB; |
| 47 | } |
| 48 | .image |
| 49 | { |
| 50 | min-height: 100px; |
| 51 | } |
| 52 | </style> |
| 53 | <title>%s</title> |
| 54 | </head> |
| 55 | <body> |
| 56 | <h2>%s</h2> |
| 57 | """ % (title, title)) |
| 58 | for post in thread["posts"]: |
| 59 | out = sp + "<a name=\"p{id}\">\n".format(id=str(post["no"])) |
| 60 | if post["no"] in images and showimages: |
| 61 | out += sp + " <div class=\"post image\" onmouseover=\"this.style.backgroundColor = '#FFBBBB';\" onmouseout=\"this.style.backgroundColor = '#BBBBBB';\">\n" |
| 62 | image = images[post["no"]] |
| 63 | if getimages: |
| 64 | downloadImage(board, image["id"] + image["ext"]) |
| 65 | out += sp + " <a href=\"{filename}\"><img src=\"{filename}\" align=\"left\" style=\"height: 100px; width: 125px;\"></a>\n".format(filename=image["id"] + image["ext"]) |
| 66 | else: |
| 67 | out += sp + " <div class=\"post\" onmouseover=\"this.style.backgroundColor = '#FFBBBB';\" onmouseout=\"this.style.backgroundColor = '#BBBBBB';\">\n" |
| 68 | out += sp + sp + post["com"].replace("href=\"" + id, "href=\"") + "\n" |
| 69 | out += sp + " </div>\n" |
| 70 | out += sp + "</a><hr>\n" |
| 71 | file.write(out) |
| 72 | file.write(""" |
| 73 | </body> |
| 74 | </html>""") |