#!/usr/bin/env python2 # Usage: ./apache_mirror.py [BASE URL] [NUMBER OF COLUMNS] # If the target site has more (or less) than three columns in the directory index, the second parameter is required. import sys import urllib2 from bs4 import BeautifulSoup as Soup def do_iterate(curdir="", BASE="", COL="", fh=None): print "Entering {}".format(BASE + curdir) for tr in Soup(urllib2.urlopen(BASE + curdir).read()).findAll('tr')[1:]: if tr.img["alt"] == "[PARENTDIR]": continue if tr.a["href"][-1] == "/": do_iterate(curdir + tr.a["href"], BASE, COL, fh) else: print "Adding {}".format(BASE + curdir + tr.a["href"]) fh.write("curl {0}{1} --create-dirs -o \"{2}\"\n".format(BASE, curdir+tr.a["href"], (curdir+tr.a["href"]).replace("%20", " "))) def main(): BASE=sys.argv[1] if len(sys.argv) > 1 else "http://repo.blha303.biz/" COL=int(sys.argv[2]) if len(sys.argv) > 2 else 3 with open("getrepo.bat", "w") as fw: do_iterate("", BASE, COL, fw) return 0 if __name__ == "__main__": sys.exit(main())