bropages.py
· 746 B · Python
Raw
import requests
from os import mkdir, sep, listdir
from BeautifulSoup import BeautifulSoup as Soup
# Load page
soup = Soup(requests.get("http://bropages.org/browse").text)
# Get rows excluding header
rows = soup.findAll('tr')[1:]
cmds = []
# iterate over rows, get commands, save list without duplicates
for a in rows:
out = a.find('td').text
if not out in cmds:
cmds.append(out)
# make output directory if not already existing
try:
mkdir('bropages')
except:
pass
# get output json, save to directory
for cmd in cmds:
try:
with open('bropages{}{}.json'.format(sep, cmd), 'w') as f:
f.write(requests.get('http://bropages.org/%s.json' % cmd).text)
print "done " + cmd
except UnicodeError: # stupid unicode
continue
| 1 | import requests |
| 2 | from os import mkdir, sep, listdir |
| 3 | from BeautifulSoup import BeautifulSoup as Soup |
| 4 | # Load page |
| 5 | soup = Soup(requests.get("http://bropages.org/browse").text) |
| 6 | # Get rows excluding header |
| 7 | rows = soup.findAll('tr')[1:] |
| 8 | cmds = [] |
| 9 | # iterate over rows, get commands, save list without duplicates |
| 10 | for a in rows: |
| 11 | out = a.find('td').text |
| 12 | if not out in cmds: |
| 13 | cmds.append(out) |
| 14 | # make output directory if not already existing |
| 15 | try: |
| 16 | mkdir('bropages') |
| 17 | except: |
| 18 | pass |
| 19 | # get output json, save to directory |
| 20 | for cmd in cmds: |
| 21 | try: |
| 22 | with open('bropages{}{}.json'.format(sep, cmd), 'w') as f: |
| 23 | f.write(requests.get('http://bropages.org/%s.json' % cmd).text) |
| 24 | print "done " + cmd |
| 25 | except UnicodeError: # stupid unicode |
| 26 | continue |