apache_mirror.py
                        
                             · 1.1 KiB · Python
                        
                    
                    
                      
                        Raw
                      
                      
                        
                          
                        
                    
                    
                
                
            #!/usr/bin/env python2
# Usage: ./apache_mirror.py [BASE URL] [NUMBER OF COLUMNS]
# If the target site has more (or less) than three columns in the directory index, the second parameter is required.
 
import sys
import urllib2
from bs4 import BeautifulSoup as Soup
 
def do_iterate(curdir="", BASE="", COL="", fh=None):
    print "Entering {}".format(BASE + curdir)
    for tr in Soup(urllib2.urlopen(BASE + curdir).read()).findAll('tr')[1:]:
        if tr.img["alt"] == "[PARENTDIR]":
            continue
        if tr.a["href"][-1] == "/":
            do_iterate(curdir + tr.a["href"], BASE, COL, fh)
        else:
            print "Adding {}".format(BASE + curdir + tr.a["href"])
            fh.write("curl {0}{1} --create-dirs -o \"{2}\"\n".format(BASE, curdir+tr.a["href"], (curdir+tr.a["href"]).replace("%20", " ")))
 
def main():
    BASE=sys.argv[1] if len(sys.argv) > 1 else "http://repo.blha303.biz/"
    COL=int(sys.argv[2]) if len(sys.argv) > 2 else 3
    with open("getrepo.bat", "w") as fw:
        do_iterate("", BASE, COL, fw)
    return 0
 
if __name__ == "__main__":
    sys.exit(main())
                | 1 | #!/usr/bin/env python2 | 
| 2 | # Usage: ./apache_mirror.py [BASE URL] [NUMBER OF COLUMNS] | 
| 3 | # If the target site has more (or less) than three columns in the directory index, the second parameter is required. | 
| 4 | |
| 5 | import sys | 
| 6 | import urllib2 | 
| 7 | from bs4 import BeautifulSoup as Soup | 
| 8 | |
| 9 | def do_iterate(curdir="", BASE="", COL="", fh=None): | 
| 10 | print "Entering {}".format(BASE + curdir) | 
| 11 | for tr in Soup(urllib2.urlopen(BASE + curdir).read()).findAll('tr')[1:]: | 
| 12 | if tr.img["alt"] == "[PARENTDIR]": | 
| 13 | continue | 
| 14 | if tr.a["href"][-1] == "/": | 
| 15 | do_iterate(curdir + tr.a["href"], BASE, COL, fh) | 
| 16 | else: | 
| 17 | print "Adding {}".format(BASE + curdir + tr.a["href"]) | 
| 18 | fh.write("curl {0}{1} --create-dirs -o \"{2}\"\n".format(BASE, curdir+tr.a["href"], (curdir+tr.a["href"]).replace("%20", " "))) | 
| 19 | |
| 20 | def main(): | 
| 21 | BASE=sys.argv[1] if len(sys.argv) > 1 else "http://repo.blha303.biz/" | 
| 22 | COL=int(sys.argv[2]) if len(sys.argv) > 2 else 3 | 
| 23 | with open("getrepo.bat", "w") as fw: | 
| 24 | do_iterate("", BASE, COL, fw) | 
| 25 | return 0 | 
| 26 | |
| 27 | if __name__ == "__main__": | 
| 28 | sys.exit(main()) | 
                    
                        
                        apache_mirror_fix_for_windows.py
                        
                             · 279 B · Python
                        
                    
                    
                      
                        Raw
                      
                      
                        
                          
                        
                    
                    
                
                
            with open("getrepo.bat") as f:
  d = [a.strip().replace("%", "%%").split(" ") for a in f.readlines()]
q = []
for a in d:
  q.append(" ".join(a[:a.index("-o")+1] + [x.replace("/", "\\") for x in a[a.index("-o")+1:]]))
with open("getrepo.bat", "w") as f:
  f.write("\n".join(q))
                | 1 | with open("getrepo.bat") as f: | 
| 2 | d = [a.strip().replace("%", "%%").split(" ") for a in f.readlines()] | 
| 3 | |
| 4 | q = [] | 
| 5 | for a in d: | 
| 6 | q.append(" ".join(a[:a.index("-o")+1] + [x.replace("/", "\\") for x in a[a.index("-o")+1:]])) | 
| 7 | |
| 8 | with open("getrepo.bat", "w") as f: | 
| 9 | f.write("\n".join(q)) | 
| 10 |