Last active 1440821683

Revision 06ed86508cd2c928b0a7f176fc77fa7c6e4b4e4f

apache_index_total.py Raw
1import sys
2import requests
3from hurry.filesize import size, si
4from bs4 import BeautifulSoup as Soup
5total = 0
6BASE=sys.argv[1] if len(sys.argv) > 1 else "http://file.cite.wa.edu.au/"
7
8def add_to_total(uri):
9 global total
10 headers = requests.head(BASE + uri).headers
11 if "content-length" in headers:
12 adding = int(headers["content-length"])
13 print "{}: Adding {}".format(uri, size(adding, system=si))
14 total += adding
15 else:
16 print "{}: No content-length, skipping".format(uri)
17
18def do_iterate(dir=""):
19 global BASE
20 print "Entering {}{}".format(BASE, dir)
21 for a in Soup(requests.get(BASE + dir).text).findAll('a')[3:]:
22 if a["href"][-1] == "/":
23 do_iterate(dir=dir + a["href"])
24 else:
25 add_to_total(dir + a["href"])
26
27def main():
28 try:
29 do_iterate()
30 finally:
31 print "Total size of {}: {} ({})".format(BASE, total, size(total, system=si))
32 return 0
33
34if __name__ == "__main__":
35 sys.exit(main())