Last active 1440821808

Script to mirror a directory visible over apache directory indexing. Creates a batch file containing urls and directory output locations

Revision dc2f51c34796d33af6a49ea0eb46e27ec8e02514

apache_mirror.py Raw
1#!/usr/bin/env python2
2# Usage: ./apache_mirror.py [BASE URL] [NUMBER OF COLUMNS]
3# If the target site has more (or less) than three columns in the directory index, the second parameter is required.
4
5import sys
6import urllib2
7from bs4 import BeautifulSoup as Soup
8
9def do_iterate(curdir="", BASE="", COL="", fh=None):
10 print "Entering {}".format(BASE + curdir)
11 for tr in Soup(urllib2.urlopen(BASE + curdir).read()).findAll('tr')[1:]:
12 if tr.img["alt"] == "[PARENTDIR]":
13 continue
14 if tr.a["href"][-1] == "/":
15 do_iterate(curdir + tr.a["href"], BASE, COL, fh)
16 else:
17 print "Adding {}".format(BASE + curdir + tr.a["href"])
18 fh.write("curl {0}{1} --create-dirs -o ./{1}\n".format(BASE, curdir+tr.a["href"]))
19
20def main():
21 BASE=sys.argv[1] if len(sys.argv) > 1 else "http://repo.blha303.biz/"
22 COL=int(sys.argv[2]) if len(sys.argv) > 2 else 3
23 with open("getrepo.bat", "w") as fw:
24 do_iterate("", BASE, COL, fw)
25 return 0
26
27if __name__ == "__main__":
28 sys.exit(main())
29