Revision of apache_index_total.py

1 file changed, 1 insertion, 1 deletion

apache_index_total.py

			@@ -24,7 +24,7 @@ def do_iterate(dir="", BASE="", COL=""):
24	24
25	25		def main():
26	26		BASE=sys.argv[1] if len(sys.argv) > 1 else "http://file.cite.wa.edu.au/"
27		-	COL=sys.argv[2] if len(sys.argv) > 2 else 3
	27	+	COL=int(sys.argv[2]) if len(sys.argv) > 2 else 3
28	28		try:
29	29		do_iterate("", BASE, COL)
30	30		finally:

1 file changed, 8 insertions, 8 deletions

apache_index_total.py

			@@ -3,9 +3,8 @@ import requests
3	3		from hurry.filesize import size, si
4	4		from bs4 import BeautifulSoup as Soup
5	5		total = 0
6		-	BASE=sys.argv[1] if len(sys.argv) > 1 else "http://file.cite.wa.edu.au/"
7	6
8		-	def add_to_total(uri):
	7	+	def add_to_total(uri, BASE=""):
9	8		global total
10	9		headers = requests.head(BASE + uri).headers
11	10		if "content-length" in headers:
			@@ -15,18 +14,19 @@ def add_to_total(uri):
15	14		else:
16	15		print "{}: No content-length, skipping".format(uri)
17	16
18		-	def do_iterate(dir=""):
19		-	global BASE
	17	+	def do_iterate(dir="", BASE="", COL=""):
20	18		print "Entering {}{}".format(BASE, dir)
21		-	for a in Soup(requests.get(BASE + dir).text).findAll('a')[3:]:
	19	+	for a in Soup(requests.get(BASE + dir).text).findAll('a')[COL:]:
22	20		if a["href"][-1] == "/":
23		-	do_iterate(dir=dir + a["href"])
	21	+	do_iterate(dir + a["href"], BASE, COL)
24	22		else:
25		-	add_to_total(dir + a["href"])
	23	+	add_to_total(dir + a["href"], BASE)
26	24
27	25		def main():
	26	+	BASE=sys.argv[1] if len(sys.argv) > 1 else "http://file.cite.wa.edu.au/"
	27	+	COL=sys.argv[2] if len(sys.argv) > 2 else 3
28	28		try:
29		-	do_iterate()
	29	+	do_iterate("", BASE, COL)
30	30		finally:
31	31		print "Total size of {}: {} ({})".format(BASE, total, size(total, system=si))
32	32		return 0

1 file changed, 35 insertions

apache_index_total.py(file created)

		@@ -0,0 +1,35 @@
1	+	import sys
2	+	import requests
3	+	from hurry.filesize import size, si
4	+	from bs4 import BeautifulSoup as Soup
5	+	total = 0
6	+	BASE=sys.argv[1] if len(sys.argv) > 1 else "http://file.cite.wa.edu.au/"
7	+
8	+	def add_to_total(uri):
9	+	global total
10	+	headers = requests.head(BASE + uri).headers
11	+	if "content-length" in headers:
12	+	adding = int(headers["content-length"])
13	+	print "{}: Adding {}".format(uri, size(adding, system=si))
14	+	total += adding
15	+	else:
16	+	print "{}: No content-length, skipping".format(uri)
17	+
18	+	def do_iterate(dir=""):
19	+	global BASE
20	+	print "Entering {}{}".format(BASE, dir)
21	+	for a in Soup(requests.get(BASE + dir).text).findAll('a')[3:]:
22	+	if a["href"][-1] == "/":
23	+	do_iterate(dir=dir + a["href"])
24	+	else:
25	+	add_to_total(dir + a["href"])
26	+
27	+	def main():
28	+	try:
29	+	do_iterate()
30	+	finally:
31	+	print "Total size of {}: {} ({})".format(BASE, total, size(total, system=si))
32	+	return 0
33	+
34	+	if __name__ == "__main__":
35	+	sys.exit(main())

Newer Older