import requests from bs4 import BeautifulSoup as Soup print "Getting data from jd.bukkit.org..." data = requests.get("http://jd.bukkit.org/dev/apidocs/index-all.html") soup = Soup(data.text) print "Retrieved data. Bukkit version: " + soup.find('title').text[7:-1] methods = {} classes = {} for dt in soup.findAll('dt'): # If more than one link in definition (i.e. a class or method definition)... if len(dt.findAll('a')) > 1: # Get the class name in format "org/bukkit/ab/cd.html" classn = dt.findAll('a')[1]["href"][2:] # Check if this is a method if "(" in dt.find('b').text.split(" ")[0] and dt.find('b').text[0].islower(): # Get method name without () method = dt.find('b').text.split("(")[0].lower() print "Adding %s:%s" % (classn.replace("/", ".")[:-5], method) # If a method of this name has been seen before if method in methods: # Add class name with package to list methods[method].append(classn.split("/")[-1].replace(".html", "")) else: # Make a new list for this method methods[method] = [classn.split("/")[-1].replace(".html", "")] # Check for duplicate classes. Shouldn't be a problem unless someone's lazy if not classn.split("/")[-1][:-5] in classes: # And add to the list classes[classn.split("/")[-1][:-5]] = classn print "Done %s: %s" % (classn, method) # Save data for copying to bukjdm.py with open("bukjd.py", "w") as f: f.write("methods = " + str(methods) + "\n\n") f.write("classes = " + str(classes) + "\n\n")