Last active 1440821808

Script to mirror a directory visible over apache directory indexing. Creates a batch file containing urls and directory output locations

Revision cac97fe408b5414a1ea8dcb439bd77a602dd4cd8

apache_mirror.py Raw
1#!/usr/bin/env python2
2# Usage: ./apache_mirror.py [BASE URL] [NUMBER OF COLUMNS]
3# If the target site has more (or less) than three columns in the directory index, the second parameter is required.
4
5import sys
6import urllib2
7from bs4 import BeautifulSoup as Soup
8
9def do_iterate(curdir="", BASE="", COL="", fh=None):
10 print "Entering {}".format(BASE + curdir)
11 for tr in Soup(urllib2.urlopen(BASE + curdir).read()).findAll('tr')[1:]:
12 if tr.img["alt"] == "[PARENTDIR]":
13 continue
14 if tr.a["href"][-1] == "/":
15 do_iterate(curdir + tr.a["href"], BASE, COL, fh)
16 else:
17 print "Adding {}".format(BASE + curdir + tr.a["href"])
18 fh.write("curl {0}{1} --create-dirs -o {1}\n".format(BASE, curdir+tr.a["href"]))
19
20def main():
21 BASE=sys.argv[1] if len(sys.argv) > 1 else "http://repo.blha303.biz/"
22 COL=int(sys.argv[2]) if len(sys.argv) > 2 else 3
23 with open("getrepo.bat", "w") as fw:
24 do_iterate("", BASE, COL, fw)
25 return 0
26
27if __name__ == "__main__":
28 sys.exit(main())
29
apache_mirror_fix_for_windows.py Raw
1with open("getrepo.bat") as f:
2 d = [a.strip().replace("%", "%%").split(" ") for a in f.readlines()]
3
4q = []
5for a in d:
6 q.append(" ".join(a[:a.index("-o")+1] + [x.replace("/", "\\") for x in a[a.index("-o")+1:]]))
7
8with open("getrepo.bat", "w") as f:
9 f.write("\n".join(q))
10