Steven Smith revised this gist . Go to revision
1 file changed, 11 insertions, 2 deletions
netflix.py
| @@ -1,8 +1,17 @@ | |||
| 1 | 1 | """ | |
| 2 | 2 | Example usage: | |
| 3 | - | >>> netflix("60024942") | |
| 4 | - | (u'Catch Me If You Can', u'2002', u'Thu Jan 09 08:00:00 UTC 2003', u'M', u'140 minutes', u'http://cdn2.nflximg.net/images/0432/12050432.jpg', u'An FBI agent makes it his mission to put cunning con man Frank Abagnale Jr. behind bars. But Frank not only eludes capture, he revels in the pursuit.', {u'director': u'Steven Spielberg', u'genre': u'Dramas', u'language': u'English', u'starring': u'Leonardo DiCaprio, Tom Hanks'}) | |
| 3 | + | ||
| 4 | + | >>> netflix("60024942") | |
| 5 | + | (u'Catch Me If You Can', u'2002', u'Thu Jan 09 08:00:00 UTC 2003', u'M', u'140 minutes', u'http://cdn2.nflximg.net/images/0432/12050432.jpg', u'An FBI agent makes it his mission to put cunning con man Frank Abagnale Jr. behind bars. But Frank not only eludes capture, he revels in the pursuit.', {u'director': u'Steven Spielberg', u'genre': u'Dramas', u'language': u'English', u'starring': u'Leonardo DiCaprio, Tom Hanks'}) | |
| 6 | + | ||
| 5 | 7 | ID could be obtained through trivial URL parsing using any stdlib library. | |
| 8 | + | Here, i'll do an example: | |
| 9 | + | ||
| 10 | + | from urlparse import urlparse, parse_qs # urllib.parse in python 3 | |
| 11 | + | parse_qs(urlparse("http://www.netflix.com/WiPlayer?movieid=60024942").query) | |
| 12 | + | => {'movieid': ['60024942']} | |
| 13 | + | ||
| 14 | + | Obviously you'd have a much longer url, but you can then use r["movieid"][0] to get the id, pass it to netflix(), tadaaaaa | |
| 6 | 15 | """ | |
| 7 | 16 | ||
| 8 | 17 | def netflix(id): | |
Steven Smith revised this gist . Go to revision
1 file changed, 22 insertions
netflix.py(file created)
| @@ -0,0 +1,22 @@ | |||
| 1 | + | """ | |
| 2 | + | Example usage: | |
| 3 | + | >>> netflix("60024942") | |
| 4 | + | (u'Catch Me If You Can', u'2002', u'Thu Jan 09 08:00:00 UTC 2003', u'M', u'140 minutes', u'http://cdn2.nflximg.net/images/0432/12050432.jpg', u'An FBI agent makes it his mission to put cunning con man Frank Abagnale Jr. behind bars. But Frank not only eludes capture, he revels in the pursuit.', {u'director': u'Steven Spielberg', u'genre': u'Dramas', u'language': u'English', u'starring': u'Leonardo DiCaprio, Tom Hanks'}) | |
| 5 | + | ID could be obtained through trivial URL parsing using any stdlib library. | |
| 6 | + | """ | |
| 7 | + | ||
| 8 | + | def netflix(id): | |
| 9 | + | """ Returns a tuple of strings: (title, year, date-published, MPAA-rating, duration, boxart-url, description, moreinfo) | |
| 10 | + | moreinfo may contain genre, language, actor and director info, depending on what's available""" | |
| 11 | + | soup = Soup(requests.get("http://www.netflix.com/JSON/BOB?movieid=" + id).json()["html"]) | |
| 12 | + | data = ( | |
| 13 | + | soup.find(attrs={'class': 'title'}).text.strip() if soup.find(attrs={'class': 'title'}) else None, | |
| 14 | + | soup.find(attrs={'class': 'year'}).text.strip() if soup.find(attrs={'class': 'year'}) else None, | |
| 15 | + | soup.find(attrs={'itemprop': 'datePublished'})["content"] if soup.find(attrs={'itemprop': 'datePublished'}) else None, | |
| 16 | + | soup.find(attrs={'class': 'mpaaRating'}).text.strip() if soup.find(attrs={'class': 'mpaaRating'}) else None, | |
| 17 | + | soup.find(attrs={'class': 'duration'}).text.strip() if soup.find(attrs={'class': 'duration'}) else None, | |
| 18 | + | soup.find(attrs={'itemprop': 'thumbnailUrl'})["src"] if soup.find(attrs={'itemprop': 'thumbnailUrl'}) else None, | |
| 19 | + | soup.find(attrs={'class', 'boxShot'}).nextSibling.strip(), | |
| 20 | + | {k.text.strip()[:-1].lower(): " ".join(v.text.strip().split()) for k,v in zip(soup.findAll('dt'), soup.findAll('dd'))} | |
| 21 | + | ) | |
| 22 | + | return data | |