import sys
import requests
from hurry.filesize import size, si
from bs4 import BeautifulSoup as Soup
total = 0

def add_to_total(uri, BASE=""):
    global total
    headers = requests.head(BASE + uri).headers
    if "content-length" in headers:
        adding = int(headers["content-length"])
        print "{}: Adding {}".format(uri, size(adding, system=si))
        total += adding
    else:
        print "{}: No content-length, skipping".format(uri)

def do_iterate(dir="", BASE="", COL=""):
    print "Entering {}{}".format(BASE, dir)
    for a in Soup(requests.get(BASE + dir).text).findAll('a')[COL:]:
        if a["href"][-1] == "/":
            do_iterate(dir + a["href"], BASE, COL)
        else:
            add_to_total(dir + a["href"], BASE)

def main():
    BASE=sys.argv[1] if len(sys.argv) > 1 else "http://file.cite.wa.edu.au/"
    COL=int(sys.argv[2]) if len(sys.argv) > 2 else 3
    try:
        do_iterate("", BASE, COL)
    finally:
        print "Total size of {}: {} ({})".format(BASE, total, size(total, system=si))
        return 0

if __name__ == "__main__":
    sys.exit(main())