3 from HTMLParser
import HTMLParser
, HTMLParseError
6 class pandora_fetch(HTMLParser
):
7 """ This class should be initiated with a Pandora account username. It exposes a list of tracks `self.tracks` and a dictionary of title->artist pairs `tracks`.
10 def __init__(self
, user
):
11 HTMLParser
.__init
__(self
)
16 self
.__in
_track
= False
17 self
.__current
_track
= None
18 self
.__mode
= 'stations'
19 page
= urllib
.urlopen('http://www.pandora.com/favorites/profile_tablerows_station.vm?webname=' + self
.user
).read()
21 self
.__mode
= 'tracks'
22 if len(self
.stations
) == 0:
23 print 'Are you sure your pandora profile is public? Can\'t seem to find any stations listed with your account.'
25 for station
in self
.stations
:
26 page
= urllib
.urlopen('http://www.pandora.com/favorites/station_tablerows_thumb_up.vm?token=' + station
+ '&sort_col=thumbsUpDate').read()
29 def handle_starttag(self
, tag
, attrs
):
30 if self
.__mode
== 'stations':
32 for attr
, value
in attrs
:
33 if attr
== 'class' and value
== 'station_table_row':
36 if self
.__in
_row
and tag
== 'a':
37 for attr
, value
in attrs
:
38 if self
.__in
_row
and attr
== 'href':
39 self
.stations
.append(value
[10:])
41 if self
.__mode
== 'tracks':
43 for attr
, value
in attrs
:
44 if attr
== 'class' and value
== 'track_title':
45 self
.__in
_track
= True
47 if attr
== 'tracktitle':
48 self
.__current
_track
= value
50 def handle_data(self
, text
):
52 self
.tracks
[self
.__current
_track
] = text
54 def handle_endtag(self
, tag
):
58 self
.__in
_track
= False
59 self
.__current
_track
= None
62 class search_youtube(HTMLParser
):
63 """ This class should be initiated with a list of search terms. It exposes a list of YouTube video ids `self.track_ids`. """
65 def __init__(self
, search_terms
):
67 for search
in search_terms
:
68 HTMLParser
.__init
__(self
)
70 self
.__in
_search
_results
= False
71 search
= urllib
.quote_plus(search
)
72 url
= 'http://youtube.com/results?search_query='
73 connection
= urllib
.urlopen(url
+ search
)
74 encoding
= connection
.headers
.getparam('charset')
75 page
= connection
.read()
76 page
= page
.decode(encoding
)
79 except UnicodeDecodeError:
80 print 'problem decoding', url
+ search
81 except UnicodeEncodeError:
82 print 'problem encoding', url
+ search
83 except HTMLParseError
:
84 # There is no way to override HTMLParseError and
85 # continue parsing, see:
86 # http://bugs.python.org/issue755660
87 # But the data is there!
88 print 'problem parsing', url
+ search
92 def handle_starttag(self
, tag
, attrs
):
94 for attr
, value
in attrs
:
95 if attr
== 'id' and value
== 'search-results':
96 self
.__in
_search
_results
= True
97 if self
.__in
_search
_results
:
98 for attr
, value
in attrs
:
99 if attr
== 'href' and value
[:-11] == '/watch?v=' and len(value
[9:]) == 11:
100 self
.track_ids
.append(value
[9:])
101 self
.__in
_search
_results
= False
103 # Calling self.reset() causes the following error:
105 # File "/usr/lib/python2.6/HTMLParser.py", line 108, in feed self.goahead(0)
106 # File "/usr/lib/python2.6/HTMLParser.py", line 148, in goahead k = self.parse_starttag(i)
107 # File "/usr/lib/python2.6/HTMLParser.py", line 229, in parse_starttag endpos = self.check_for_whole_start_tag(i)
108 # File "/usr/lib/python2.6/HTMLParser.py", line 305, in check_for_whole_start_tag
109 # raise AssertionError("we should not get here!")
111 # I can't figure out why that's happening. I've
112 # discovered that calling HTMLParser.__init__(self)
113 # inside the search term loop in self.__init__ also
114 # resets the instance. The instance must be reset to
115 # accept a new page with self.feed(). Until a better
119 class found_video(BaseException
):
120 """ Exception class to throw after finding a video to stop HTMLParser. """