apache_index_total.py
· 1.1 KiB · Python
Raw
import sys
import requests
from hurry.filesize import size, si
from bs4 import BeautifulSoup as Soup
total = 0
def add_to_total(uri, BASE=""):
global total
headers = requests.head(BASE + uri).headers
if "content-length" in headers:
adding = int(headers["content-length"])
print "{}: Adding {}".format(uri, size(adding, system=si))
total += adding
else:
print "{}: No content-length, skipping".format(uri)
def do_iterate(dir="", BASE="", COL=""):
print "Entering {}{}".format(BASE, dir)
for a in Soup(requests.get(BASE + dir).text).findAll('a')[COL:]:
if a["href"][-1] == "/":
do_iterate(dir + a["href"], BASE, COL)
else:
add_to_total(dir + a["href"], BASE)
def main():
BASE=sys.argv[1] if len(sys.argv) > 1 else "http://file.cite.wa.edu.au/"
COL=sys.argv[2] if len(sys.argv) > 2 else 3
try:
do_iterate("", BASE, COL)
finally:
print "Total size of {}: {} ({})".format(BASE, total, size(total, system=si))
return 0
if __name__ == "__main__":
sys.exit(main())
| 1 | import sys |
| 2 | import requests |
| 3 | from hurry.filesize import size, si |
| 4 | from bs4 import BeautifulSoup as Soup |
| 5 | total = 0 |
| 6 | |
| 7 | def add_to_total(uri, BASE=""): |
| 8 | global total |
| 9 | headers = requests.head(BASE + uri).headers |
| 10 | if "content-length" in headers: |
| 11 | adding = int(headers["content-length"]) |
| 12 | print "{}: Adding {}".format(uri, size(adding, system=si)) |
| 13 | total += adding |
| 14 | else: |
| 15 | print "{}: No content-length, skipping".format(uri) |
| 16 | |
| 17 | def do_iterate(dir="", BASE="", COL=""): |
| 18 | print "Entering {}{}".format(BASE, dir) |
| 19 | for a in Soup(requests.get(BASE + dir).text).findAll('a')[COL:]: |
| 20 | if a["href"][-1] == "/": |
| 21 | do_iterate(dir + a["href"], BASE, COL) |
| 22 | else: |
| 23 | add_to_total(dir + a["href"], BASE) |
| 24 | |
| 25 | def main(): |
| 26 | BASE=sys.argv[1] if len(sys.argv) > 1 else "http://file.cite.wa.edu.au/" |
| 27 | COL=sys.argv[2] if len(sys.argv) > 2 else 3 |
| 28 | try: |
| 29 | do_iterate("", BASE, COL) |
| 30 | finally: |
| 31 | print "Total size of {}: {} ({})".format(BASE, total, size(total, system=si)) |
| 32 | return 0 |
| 33 | |
| 34 | if __name__ == "__main__": |
| 35 | sys.exit(main()) |