import requests from bs4 import BeautifulSoup as Soup data = requests.get("http://jd.bukkit.org/dev/apidocs/index-all.html") soup = Soup(data.text) methods = {} classes = {} for dt in soup.findAll('dt'): # If more than one link in definition (i.e. a class or method definition)... if len(dt.findAll('a')) > 1: # Get the class name in format "org/bukkit/ab/cd.html" classn = dt.findAll('a')[1]["href"][2:] # Check if this is a method if "(" in dt.find('b').text.split(" ")[0] and dt.find('b').text[0].islower(): # Get method name without () method = dt.find('b').text.split("(")[0] # If a method of this name has been seen before if method in methods: # Add class name with package to list methods[method].append(classn) else: # Make a new list for this method methods[method] = [classn] # Check for duplicate classes. Shouldn't be a problem unless someone's lazy if not classn.split("/")[-1] in classes: # And add to the list classes[classn.split("/")[-1]] = classn # Save data for copying to bukjdm.py with open("bukjd.py", "w") as f: f.write("methods = " + str(methods) + "\n\n") f.write("classes = " + str(classes) + "\n\n")