Last active 1440821683

Steven Smith revised this gist 1423029334. Go to revision

1 file changed, 1 insertion, 1 deletion

apache_index_total.py

@@ -24,7 +24,7 @@ def do_iterate(dir="", BASE="", COL=""):
24 24
25 25 def main():
26 26 BASE=sys.argv[1] if len(sys.argv) > 1 else "http://file.cite.wa.edu.au/"
27 - COL=sys.argv[2] if len(sys.argv) > 2 else 3
27 + COL=int(sys.argv[2]) if len(sys.argv) > 2 else 3
28 28 try:
29 29 do_iterate("", BASE, COL)
30 30 finally:

Steven Smith revised this gist 1423029278. Go to revision

1 file changed, 8 insertions, 8 deletions

apache_index_total.py

@@ -3,9 +3,8 @@ import requests
3 3 from hurry.filesize import size, si
4 4 from bs4 import BeautifulSoup as Soup
5 5 total = 0
6 - BASE=sys.argv[1] if len(sys.argv) > 1 else "http://file.cite.wa.edu.au/"
7 6
8 - def add_to_total(uri):
7 + def add_to_total(uri, BASE=""):
9 8 global total
10 9 headers = requests.head(BASE + uri).headers
11 10 if "content-length" in headers:
@@ -15,18 +14,19 @@ def add_to_total(uri):
15 14 else:
16 15 print "{}: No content-length, skipping".format(uri)
17 16
18 - def do_iterate(dir=""):
19 - global BASE
17 + def do_iterate(dir="", BASE="", COL=""):
20 18 print "Entering {}{}".format(BASE, dir)
21 - for a in Soup(requests.get(BASE + dir).text).findAll('a')[3:]:
19 + for a in Soup(requests.get(BASE + dir).text).findAll('a')[COL:]:
22 20 if a["href"][-1] == "/":
23 - do_iterate(dir=dir + a["href"])
21 + do_iterate(dir + a["href"], BASE, COL)
24 22 else:
25 - add_to_total(dir + a["href"])
23 + add_to_total(dir + a["href"], BASE)
26 24
27 25 def main():
26 + BASE=sys.argv[1] if len(sys.argv) > 1 else "http://file.cite.wa.edu.au/"
27 + COL=sys.argv[2] if len(sys.argv) > 2 else 3
28 28 try:
29 - do_iterate()
29 + do_iterate("", BASE, COL)
30 30 finally:
31 31 print "Total size of {}: {} ({})".format(BASE, total, size(total, system=si))
32 32 return 0

Steven Smith revised this gist 1423028900. Go to revision

1 file changed, 35 insertions

apache_index_total.py(file created)

@@ -0,0 +1,35 @@
1 + import sys
2 + import requests
3 + from hurry.filesize import size, si
4 + from bs4 import BeautifulSoup as Soup
5 + total = 0
6 + BASE=sys.argv[1] if len(sys.argv) > 1 else "http://file.cite.wa.edu.au/"
7 +
8 + def add_to_total(uri):
9 + global total
10 + headers = requests.head(BASE + uri).headers
11 + if "content-length" in headers:
12 + adding = int(headers["content-length"])
13 + print "{}: Adding {}".format(uri, size(adding, system=si))
14 + total += adding
15 + else:
16 + print "{}: No content-length, skipping".format(uri)
17 +
18 + def do_iterate(dir=""):
19 + global BASE
20 + print "Entering {}{}".format(BASE, dir)
21 + for a in Soup(requests.get(BASE + dir).text).findAll('a')[3:]:
22 + if a["href"][-1] == "/":
23 + do_iterate(dir=dir + a["href"])
24 + else:
25 + add_to_total(dir + a["href"])
26 +
27 + def main():
28 + try:
29 + do_iterate()
30 + finally:
31 + print "Total size of {}: {} ({})".format(BASE, total, size(total, system=si))
32 + return 0
33 +
34 + if __name__ == "__main__":
35 + sys.exit(main())
Newer Older