Steven Smith revised this gist . Go to revision
1 file changed, 1 insertion, 1 deletion
apache_index_total.py
| @@ -24,7 +24,7 @@ def do_iterate(dir="", BASE="", COL=""): | |||
| 24 | 24 | ||
| 25 | 25 | def main(): | |
| 26 | 26 | BASE=sys.argv[1] if len(sys.argv) > 1 else "http://file.cite.wa.edu.au/" | |
| 27 | - | COL=sys.argv[2] if len(sys.argv) > 2 else 3 | |
| 27 | + | COL=int(sys.argv[2]) if len(sys.argv) > 2 else 3 | |
| 28 | 28 | try: | |
| 29 | 29 | do_iterate("", BASE, COL) | |
| 30 | 30 | finally: | |
Steven Smith revised this gist . Go to revision
1 file changed, 8 insertions, 8 deletions
apache_index_total.py
| @@ -3,9 +3,8 @@ import requests | |||
| 3 | 3 | from hurry.filesize import size, si | |
| 4 | 4 | from bs4 import BeautifulSoup as Soup | |
| 5 | 5 | total = 0 | |
| 6 | - | BASE=sys.argv[1] if len(sys.argv) > 1 else "http://file.cite.wa.edu.au/" | |
| 7 | 6 | ||
| 8 | - | def add_to_total(uri): | |
| 7 | + | def add_to_total(uri, BASE=""): | |
| 9 | 8 | global total | |
| 10 | 9 | headers = requests.head(BASE + uri).headers | |
| 11 | 10 | if "content-length" in headers: | |
| @@ -15,18 +14,19 @@ def add_to_total(uri): | |||
| 15 | 14 | else: | |
| 16 | 15 | print "{}: No content-length, skipping".format(uri) | |
| 17 | 16 | ||
| 18 | - | def do_iterate(dir=""): | |
| 19 | - | global BASE | |
| 17 | + | def do_iterate(dir="", BASE="", COL=""): | |
| 20 | 18 | print "Entering {}{}".format(BASE, dir) | |
| 21 | - | for a in Soup(requests.get(BASE + dir).text).findAll('a')[3:]: | |
| 19 | + | for a in Soup(requests.get(BASE + dir).text).findAll('a')[COL:]: | |
| 22 | 20 | if a["href"][-1] == "/": | |
| 23 | - | do_iterate(dir=dir + a["href"]) | |
| 21 | + | do_iterate(dir + a["href"], BASE, COL) | |
| 24 | 22 | else: | |
| 25 | - | add_to_total(dir + a["href"]) | |
| 23 | + | add_to_total(dir + a["href"], BASE) | |
| 26 | 24 | ||
| 27 | 25 | def main(): | |
| 26 | + | BASE=sys.argv[1] if len(sys.argv) > 1 else "http://file.cite.wa.edu.au/" | |
| 27 | + | COL=sys.argv[2] if len(sys.argv) > 2 else 3 | |
| 28 | 28 | try: | |
| 29 | - | do_iterate() | |
| 29 | + | do_iterate("", BASE, COL) | |
| 30 | 30 | finally: | |
| 31 | 31 | print "Total size of {}: {} ({})".format(BASE, total, size(total, system=si)) | |
| 32 | 32 | return 0 | |
Steven Smith revised this gist . Go to revision
1 file changed, 35 insertions
apache_index_total.py(file created)
| @@ -0,0 +1,35 @@ | |||
| 1 | + | import sys | |
| 2 | + | import requests | |
| 3 | + | from hurry.filesize import size, si | |
| 4 | + | from bs4 import BeautifulSoup as Soup | |
| 5 | + | total = 0 | |
| 6 | + | BASE=sys.argv[1] if len(sys.argv) > 1 else "http://file.cite.wa.edu.au/" | |
| 7 | + | ||
| 8 | + | def add_to_total(uri): | |
| 9 | + | global total | |
| 10 | + | headers = requests.head(BASE + uri).headers | |
| 11 | + | if "content-length" in headers: | |
| 12 | + | adding = int(headers["content-length"]) | |
| 13 | + | print "{}: Adding {}".format(uri, size(adding, system=si)) | |
| 14 | + | total += adding | |
| 15 | + | else: | |
| 16 | + | print "{}: No content-length, skipping".format(uri) | |
| 17 | + | ||
| 18 | + | def do_iterate(dir=""): | |
| 19 | + | global BASE | |
| 20 | + | print "Entering {}{}".format(BASE, dir) | |
| 21 | + | for a in Soup(requests.get(BASE + dir).text).findAll('a')[3:]: | |
| 22 | + | if a["href"][-1] == "/": | |
| 23 | + | do_iterate(dir=dir + a["href"]) | |
| 24 | + | else: | |
| 25 | + | add_to_total(dir + a["href"]) | |
| 26 | + | ||
| 27 | + | def main(): | |
| 28 | + | try: | |
| 29 | + | do_iterate() | |
| 30 | + | finally: | |
| 31 | + | print "Total size of {}: {} ({})".format(BASE, total, size(total, system=si)) | |
| 32 | + | return 0 | |
| 33 | + | ||
| 34 | + | if __name__ == "__main__": | |
| 35 | + | sys.exit(main()) | |