HOTFIX: deleted legacy line
[i_like_pandora.git] / search_classes.py
1 #!/usr/bin/env python
2
3 from HTMLParser import HTMLParser, HTMLParseError
4 import urllib
5
6 class pandora_fetch(HTMLParser):
7 """ This class should be initiated with a Pandora account username. It exposes a list of tracks `self.tracks` and a dictionary of title->artist pairs `tracks`.
8 """
9
10 def __init__(self, user):
11 HTMLParser.__init__(self)
12 self.user = user
13 self.stations = []
14 self.tracks = {}
15 self.__in_row = False
16 self.__in_track = False
17 self.__current_track = None
18 self.__mode = 'stations'
19 page = urllib.urlopen('http://www.pandora.com/favorites/profile_tablerows_station.vm?webname=' + self.user).read()
20 self.feed(page)
21 self.__mode = 'tracks'
22 if len(self.stations) == 0:
23 print 'Are you sure your pandora profile is public? Can\'t seem to find any stations listed with your account.'
24 return 1
25 for station in self.stations:
26 page = urllib.urlopen('http://www.pandora.com/favorites/station_tablerows_thumb_up.vm?token=' + station + '&sort_col=thumbsUpDate').read()
27 self.feed(page)
28
29 def handle_starttag(self, tag, attrs):
30 if self.__mode == 'stations':
31 if tag == 'div':
32 for attr, value in attrs:
33 if attr == 'class' and value == 'station_table_row':
34 self.__in_row = True
35 continue
36 if self.__in_row and tag == 'a':
37 for attr, value in attrs:
38 if self.__in_row and attr == 'href':
39 self.stations.append(value[10:])
40 continue
41 if self.__mode == 'tracks':
42 if tag == 'span':
43 for attr, value in attrs:
44 if attr == 'class' and value == 'track_title':
45 self.__in_track = True
46 continue
47 if attr == 'tracktitle':
48 self.__current_track = value
49
50 def handle_data(self, text):
51 if self.__in_track:
52 self.tracks[self.__current_track] = text
53
54 def handle_endtag(self, tag):
55 if tag == 'div':
56 self.__in_row = False
57 if tag == 'a':
58 self.__in_track = False
59 self.__current_track = None
60
61
62 class search_youtube(HTMLParser):
63 """ This class should be initiated with a list of search terms. It exposes a list of YouTube video ids `self.track_ids`. """
64
65 def __init__(self, search_terms):
66 self.track_ids = []
67 for search in search_terms:
68 HTMLParser.__init__(self)
69 page = ''
70 self.__in_search_results = False
71 search = urllib.quote_plus(search)
72 url = 'http://youtube.com/results?search_query='
73 connection = urllib.urlopen(url + search)
74 encoding = connection.headers.getparam('charset')
75 page = connection.read()
76 page = page.decode(encoding)
77 try:
78 self.feed(page)
79 except UnicodeDecodeError:
80 print 'problem decoding', url + search
81 except UnicodeEncodeError:
82 print 'problem encoding', url + search
83 except HTMLParseError:
84 # There is no way to override HTMLParseError and
85 # continue parsing, see:
86 # http://bugs.python.org/issue755660
87 # But the data is there!
88 print 'problem parsing', url + search
89 except found_video:
90 pass
91
92 def handle_starttag(self, tag, attrs):
93 if tag == 'div':
94 for attr, value in attrs:
95 if attr == 'id' and value == 'search-results':
96 self.__in_search_results = True
97 if self.__in_search_results:
98 for attr, value in attrs:
99 if attr == 'href' and value[:-11] == '/watch?v=' and len(value[9:]) == 11:
100 self.track_ids.append(value[9:])
101 self.__in_search_results = False
102 #self.reset()
103 # Calling self.reset() causes the following error:
104
105 # File "/usr/lib/python2.6/HTMLParser.py", line 108, in feed self.goahead(0)
106 # File "/usr/lib/python2.6/HTMLParser.py", line 148, in goahead k = self.parse_starttag(i)
107 # File "/usr/lib/python2.6/HTMLParser.py", line 229, in parse_starttag endpos = self.check_for_whole_start_tag(i)
108 # File "/usr/lib/python2.6/HTMLParser.py", line 305, in check_for_whole_start_tag
109 # raise AssertionError("we should not get here!")
110
111 # I can't figure out why that's happening. I've
112 # discovered that calling HTMLParser.__init__(self)
113 # inside the search term loop in self.__init__ also
114 # resets the instance. The instance must be reset to
115 # accept a new page with self.feed(). Until a better
116 # solution is found:
117 raise found_video
118
119 class found_video(BaseException):
120 """ Exception class to throw after finding a video to stop HTMLParser. """
121 pass