apache_index_total.py
· 1010 B · Python
Raw
import sys
import requests
from hurry.filesize import size, si
from bs4 import BeautifulSoup as Soup
total = 0
BASE=sys.argv[1] if len(sys.argv) > 1 else "http://file.cite.wa.edu.au/"
def add_to_total(uri):
global total
headers = requests.head(BASE + uri).headers
if "content-length" in headers:
adding = int(headers["content-length"])
print "{}: Adding {}".format(uri, size(adding, system=si))
total += adding
else:
print "{}: No content-length, skipping".format(uri)
def do_iterate(dir=""):
global BASE
print "Entering {}{}".format(BASE, dir)
for a in Soup(requests.get(BASE + dir).text).findAll('a')[3:]:
if a["href"][-1] == "/":
do_iterate(dir=dir + a["href"])
else:
add_to_total(dir + a["href"])
def main():
try:
do_iterate()
finally:
print "Total size of {}: {} ({})".format(BASE, total, size(total, system=si))
return 0
if __name__ == "__main__":
sys.exit(main())
| 1 | import sys |
| 2 | import requests |
| 3 | from hurry.filesize import size, si |
| 4 | from bs4 import BeautifulSoup as Soup |
| 5 | total = 0 |
| 6 | BASE=sys.argv[1] if len(sys.argv) > 1 else "http://file.cite.wa.edu.au/" |
| 7 | |
| 8 | def add_to_total(uri): |
| 9 | global total |
| 10 | headers = requests.head(BASE + uri).headers |
| 11 | if "content-length" in headers: |
| 12 | adding = int(headers["content-length"]) |
| 13 | print "{}: Adding {}".format(uri, size(adding, system=si)) |
| 14 | total += adding |
| 15 | else: |
| 16 | print "{}: No content-length, skipping".format(uri) |
| 17 | |
| 18 | def do_iterate(dir=""): |
| 19 | global BASE |
| 20 | print "Entering {}{}".format(BASE, dir) |
| 21 | for a in Soup(requests.get(BASE + dir).text).findAll('a')[3:]: |
| 22 | if a["href"][-1] == "/": |
| 23 | do_iterate(dir=dir + a["href"]) |
| 24 | else: |
| 25 | add_to_total(dir + a["href"]) |
| 26 | |
| 27 | def main(): |
| 28 | try: |
| 29 | do_iterate() |
| 30 | finally: |
| 31 | print "Total size of {}: {} ({})".format(BASE, total, size(total, system=si)) |
| 32 | return 0 |
| 33 | |
| 34 | if __name__ == "__main__": |
| 35 | sys.exit(main()) |