#!/usr/bin/env python from requests import get from bs4 import BeautifulSoup as Soup soup = Soup(get("http://apps.timwhitlock.info/emoji/tables/unicode").text) emojis = [str(a.text).decode("string_escape") for a in soup.findAll("td", {"class": "code"})[1::2]] o = [_ for _ in emojis if len(_) == 4] from json import dump with open("emoji.dat", "w") as f: dump(o, f)