Steven Smith revised this gist . Go to revision
1 file changed, 6 insertions, 6 deletions
apache_mirror.py
| @@ -1,11 +1,11 @@ | |||
| 1 | 1 | #!/usr/bin/env python2 | |
| 2 | 2 | # Usage: ./apache_mirror.py [BASE URL] [NUMBER OF COLUMNS] | |
| 3 | 3 | # If the target site has more (or less) than three columns in the directory index, the second parameter is required. | |
| 4 | - | ||
| 4 | + | ||
| 5 | 5 | import sys | |
| 6 | 6 | import urllib2 | |
| 7 | 7 | from bs4 import BeautifulSoup as Soup | |
| 8 | - | ||
| 8 | + | ||
| 9 | 9 | def do_iterate(curdir="", BASE="", COL="", fh=None): | |
| 10 | 10 | print "Entering {}".format(BASE + curdir) | |
| 11 | 11 | for tr in Soup(urllib2.urlopen(BASE + curdir).read()).findAll('tr')[1:]: | |
| @@ -15,14 +15,14 @@ def do_iterate(curdir="", BASE="", COL="", fh=None): | |||
| 15 | 15 | do_iterate(curdir + tr.a["href"], BASE, COL, fh) | |
| 16 | 16 | else: | |
| 17 | 17 | print "Adding {}".format(BASE + curdir + tr.a["href"]) | |
| 18 | - | fh.write("curl {0}{1} --create-dirs -o {1}\n".format(BASE, curdir+tr.a["href"])) | |
| 19 | - | ||
| 18 | + | fh.write("curl {0}{1} --create-dirs -o \"{2}\"\n".format(BASE, curdir+tr.a["href"], (curdir+tr.a["href"]).replace("%20", " "))) | |
| 19 | + | ||
| 20 | 20 | def main(): | |
| 21 | 21 | BASE=sys.argv[1] if len(sys.argv) > 1 else "http://repo.blha303.biz/" | |
| 22 | 22 | COL=int(sys.argv[2]) if len(sys.argv) > 2 else 3 | |
| 23 | 23 | with open("getrepo.bat", "w") as fw: | |
| 24 | 24 | do_iterate("", BASE, COL, fw) | |
| 25 | 25 | return 0 | |
| 26 | - | ||
| 26 | + | ||
| 27 | 27 | if __name__ == "__main__": | |
| 28 | - | sys.exit(main()) | |
| 28 | + | sys.exit(main()) | |
Steven Smith revised this gist . Go to revision
1 file changed, 2 insertions, 3 deletions
apache_mirror_fix_for_windows.py
| @@ -1,10 +1,9 @@ | |||
| 1 | 1 | with open("getrepo.bat") as f: | |
| 2 | - | d = [a.strip().split(" ") for a in f.readlines()] | |
| 2 | + | d = [a.strip().replace("%", "%%").split(" ") for a in f.readlines()] | |
| 3 | 3 | ||
| 4 | 4 | q = [] | |
| 5 | 5 | for a in d: | |
| 6 | - | q.append(" ".join(a[:-1] + [a[-1].replace("/", "\\")])) | |
| 6 | + | q.append(" ".join(a[:a.index("-o")+1] + [x.replace("/", "\\") for x in a[a.index("-o")+1:]])) | |
| 7 | 7 | ||
| 8 | 8 | with open("getrepo.bat", "w") as f: | |
| 9 | 9 | f.write("\n".join(q)) | |
| 10 | - | ||
Steven Smith revised this gist . Go to revision
1 file changed, 10 insertions
apache_mirror_fix_for_windows.py(file created)
| @@ -0,0 +1,10 @@ | |||
| 1 | + | with open("getrepo.bat") as f: | |
| 2 | + | d = [a.strip().split(" ") for a in f.readlines()] | |
| 3 | + | ||
| 4 | + | q = [] | |
| 5 | + | for a in d: | |
| 6 | + | q.append(" ".join(a[:-1] + [a[-1].replace("/", "\\")])) | |
| 7 | + | ||
| 8 | + | with open("getrepo.bat", "w") as f: | |
| 9 | + | f.write("\n".join(q)) | |
| 10 | + | ||
Steven Smith revised this gist . Go to revision
1 file changed, 1 insertion, 1 deletion
apache_mirror.py
| @@ -15,7 +15,7 @@ def do_iterate(curdir="", BASE="", COL="", fh=None): | |||
| 15 | 15 | do_iterate(curdir + tr.a["href"], BASE, COL, fh) | |
| 16 | 16 | else: | |
| 17 | 17 | print "Adding {}".format(BASE + curdir + tr.a["href"]) | |
| 18 | - | fh.write("curl {0}{1} --create-dirs -o ./{1}\n".format(BASE, curdir+tr.a["href"])) | |
| 18 | + | fh.write("curl {0}{1} --create-dirs -o {1}\n".format(BASE, curdir+tr.a["href"])) | |
| 19 | 19 | ||
| 20 | 20 | def main(): | |
| 21 | 21 | BASE=sys.argv[1] if len(sys.argv) > 1 else "http://repo.blha303.biz/" | |
Steven Smith revised this gist . Go to revision
1 file changed, 28 insertions
apache_mirror.py(file created)
| @@ -0,0 +1,28 @@ | |||
| 1 | + | #!/usr/bin/env python2 | |
| 2 | + | # Usage: ./apache_mirror.py [BASE URL] [NUMBER OF COLUMNS] | |
| 3 | + | # If the target site has more (or less) than three columns in the directory index, the second parameter is required. | |
| 4 | + | ||
| 5 | + | import sys | |
| 6 | + | import urllib2 | |
| 7 | + | from bs4 import BeautifulSoup as Soup | |
| 8 | + | ||
| 9 | + | def do_iterate(curdir="", BASE="", COL="", fh=None): | |
| 10 | + | print "Entering {}".format(BASE + curdir) | |
| 11 | + | for tr in Soup(urllib2.urlopen(BASE + curdir).read()).findAll('tr')[1:]: | |
| 12 | + | if tr.img["alt"] == "[PARENTDIR]": | |
| 13 | + | continue | |
| 14 | + | if tr.a["href"][-1] == "/": | |
| 15 | + | do_iterate(curdir + tr.a["href"], BASE, COL, fh) | |
| 16 | + | else: | |
| 17 | + | print "Adding {}".format(BASE + curdir + tr.a["href"]) | |
| 18 | + | fh.write("curl {0}{1} --create-dirs -o ./{1}\n".format(BASE, curdir+tr.a["href"])) | |
| 19 | + | ||
| 20 | + | def main(): | |
| 21 | + | BASE=sys.argv[1] if len(sys.argv) > 1 else "http://repo.blha303.biz/" | |
| 22 | + | COL=int(sys.argv[2]) if len(sys.argv) > 2 else 3 | |
| 23 | + | with open("getrepo.bat", "w") as fw: | |
| 24 | + | do_iterate("", BASE, COL, fw) | |
| 25 | + | return 0 | |
| 26 | + | ||
| 27 | + | if __name__ == "__main__": | |
| 28 | + | sys.exit(main()) | |