import sys
import requests
from hurry.filesize import size, si
from bs4 import BeautifulSoup as Soup
total = 0
BASE=sys.argv[1] if len(sys.argv) > 1 else "http://file.cite.wa.edu.au/"

def add_to_total(uri):
    global total
    headers = requests.head(BASE + uri).headers
    if "content-length" in headers:
        adding = int(headers["content-length"])
        print "{}: Adding {}".format(uri, size(adding, system=si))
        total += adding
    else:
        print "{}: No content-length, skipping".format(uri)

def do_iterate(dir=""):
    global BASE
    print "Entering {}{}".format(BASE, dir)
    for a in Soup(requests.get(BASE + dir).text).findAll('a')[3:]:
        if a["href"][-1] == "/":
            do_iterate(dir=dir + a["href"])
        else:
            add_to_total(dir + a["href"])

def main():
    try:
        do_iterate()
    finally:
        print "Total size of {}: {} ({})".format(BASE, total, size(total, system=si))
        return 0

if __name__ == "__main__":
    sys.exit(main())