Last active 1440821808

Script to mirror a directory visible over apache directory indexing. Creates a batch file containing urls and directory output locations

Steven Smith revised this gist 1426659805. Go to revision

1 file changed, 6 insertions, 6 deletions

apache_mirror.py

@@ -1,11 +1,11 @@
1 1 #!/usr/bin/env python2
2 2 # Usage: ./apache_mirror.py [BASE URL] [NUMBER OF COLUMNS]
3 3 # If the target site has more (or less) than three columns in the directory index, the second parameter is required.
4 -
4 +
5 5 import sys
6 6 import urllib2
7 7 from bs4 import BeautifulSoup as Soup
8 -
8 +
9 9 def do_iterate(curdir="", BASE="", COL="", fh=None):
10 10 print "Entering {}".format(BASE + curdir)
11 11 for tr in Soup(urllib2.urlopen(BASE + curdir).read()).findAll('tr')[1:]:
@@ -15,14 +15,14 @@ def do_iterate(curdir="", BASE="", COL="", fh=None):
15 15 do_iterate(curdir + tr.a["href"], BASE, COL, fh)
16 16 else:
17 17 print "Adding {}".format(BASE + curdir + tr.a["href"])
18 - fh.write("curl {0}{1} --create-dirs -o {1}\n".format(BASE, curdir+tr.a["href"]))
19 -
18 + fh.write("curl {0}{1} --create-dirs -o \"{2}\"\n".format(BASE, curdir+tr.a["href"], (curdir+tr.a["href"]).replace("%20", " ")))
19 +
20 20 def main():
21 21 BASE=sys.argv[1] if len(sys.argv) > 1 else "http://repo.blha303.biz/"
22 22 COL=int(sys.argv[2]) if len(sys.argv) > 2 else 3
23 23 with open("getrepo.bat", "w") as fw:
24 24 do_iterate("", BASE, COL, fw)
25 25 return 0
26 -
26 +
27 27 if __name__ == "__main__":
28 - sys.exit(main())
28 + sys.exit(main())

Steven Smith revised this gist 1426659778. Go to revision

1 file changed, 2 insertions, 3 deletions

apache_mirror_fix_for_windows.py

@@ -1,10 +1,9 @@
1 1 with open("getrepo.bat") as f:
2 - d = [a.strip().split(" ") for a in f.readlines()]
2 + d = [a.strip().replace("%", "%%").split(" ") for a in f.readlines()]
3 3
4 4 q = []
5 5 for a in d:
6 - q.append(" ".join(a[:-1] + [a[-1].replace("/", "\\")]))
6 + q.append(" ".join(a[:a.index("-o")+1] + [x.replace("/", "\\") for x in a[a.index("-o")+1:]]))
7 7
8 8 with open("getrepo.bat", "w") as f:
9 9 f.write("\n".join(q))
10 -

Steven Smith revised this gist 1425871217. Go to revision

1 file changed, 10 insertions

apache_mirror_fix_for_windows.py(file created)

@@ -0,0 +1,10 @@
1 + with open("getrepo.bat") as f:
2 + d = [a.strip().split(" ") for a in f.readlines()]
3 +
4 + q = []
5 + for a in d:
6 + q.append(" ".join(a[:-1] + [a[-1].replace("/", "\\")]))
7 +
8 + with open("getrepo.bat", "w") as f:
9 + f.write("\n".join(q))
10 +

Steven Smith revised this gist 1425870036. Go to revision

1 file changed, 1 insertion, 1 deletion

apache_mirror.py

@@ -15,7 +15,7 @@ def do_iterate(curdir="", BASE="", COL="", fh=None):
15 15 do_iterate(curdir + tr.a["href"], BASE, COL, fh)
16 16 else:
17 17 print "Adding {}".format(BASE + curdir + tr.a["href"])
18 - fh.write("curl {0}{1} --create-dirs -o ./{1}\n".format(BASE, curdir+tr.a["href"]))
18 + fh.write("curl {0}{1} --create-dirs -o {1}\n".format(BASE, curdir+tr.a["href"]))
19 19
20 20 def main():
21 21 BASE=sys.argv[1] if len(sys.argv) > 1 else "http://repo.blha303.biz/"

Steven Smith revised this gist 1425869599. Go to revision

1 file changed, 28 insertions

apache_mirror.py(file created)

@@ -0,0 +1,28 @@
1 + #!/usr/bin/env python2
2 + # Usage: ./apache_mirror.py [BASE URL] [NUMBER OF COLUMNS]
3 + # If the target site has more (or less) than three columns in the directory index, the second parameter is required.
4 +
5 + import sys
6 + import urllib2
7 + from bs4 import BeautifulSoup as Soup
8 +
9 + def do_iterate(curdir="", BASE="", COL="", fh=None):
10 + print "Entering {}".format(BASE + curdir)
11 + for tr in Soup(urllib2.urlopen(BASE + curdir).read()).findAll('tr')[1:]:
12 + if tr.img["alt"] == "[PARENTDIR]":
13 + continue
14 + if tr.a["href"][-1] == "/":
15 + do_iterate(curdir + tr.a["href"], BASE, COL, fh)
16 + else:
17 + print "Adding {}".format(BASE + curdir + tr.a["href"])
18 + fh.write("curl {0}{1} --create-dirs -o ./{1}\n".format(BASE, curdir+tr.a["href"]))
19 +
20 + def main():
21 + BASE=sys.argv[1] if len(sys.argv) > 1 else "http://repo.blha303.biz/"
22 + COL=int(sys.argv[2]) if len(sys.argv) > 2 else 3
23 + with open("getrepo.bat", "w") as fw:
24 + do_iterate("", BASE, COL, fw)
25 + return 0
26 +
27 + if __name__ == "__main__":
28 + sys.exit(main())
Newer Older