import sys import requests from hurry.filesize import size, si from bs4 import BeautifulSoup as Soup total = 0 BASE=sys.argv[1] if len(sys.argv) > 1 else "http://file.cite.wa.edu.au/" def add_to_total(uri): global total headers = requests.head(BASE + uri).headers if "content-length" in headers: adding = int(headers["content-length"]) print "{}: Adding {}".format(uri, size(adding, system=si)) total += adding else: print "{}: No content-length, skipping".format(uri) def do_iterate(dir=""): global BASE print "Entering {}{}".format(BASE, dir) for a in Soup(requests.get(BASE + dir).text).findAll('a')[3:]: if a["href"][-1] == "/": do_iterate(dir=dir + a["href"]) else: add_to_total(dir + a["href"]) def main(): try: do_iterate() finally: print "Total size of {}: {} ({})".format(BASE, total, size(total, system=si)) return 0 if __name__ == "__main__": sys.exit(main())