3 from HTMLParser
import HTMLParser
, HTMLParseError
5 from fetch_pandora
import pandora_fetch
7 class pandora_fetch(HTMLParser
):
8 """ This class should be initiated with a Pandora account username. It exposes a list of tracks `self.tracks` and a dictionary of title->artist pairs `tracks`.
11 def __init__(self
, user
):
12 HTMLParser
.__init
__(self
)
17 self
.__in
_track
= False
18 self
.__current
_track
= None
19 self
.__mode
= 'stations'
20 page
= urllib
.urlopen('http://www.pandora.com/favorites/profile_tablerows_station.vm?webname=' + self
.user
).read()
22 self
.__mode
= 'tracks'
23 if len(self
.stations
) == 0:
24 print 'Are you sure your pandora profile is public? Can\'t seem to find any stations listed with your account.'
26 for station
in self
.stations
:
27 page
= urllib
.urlopen('http://www.pandora.com/favorites/station_tablerows_thumb_up.vm?token=' + station
+ '&sort_col=thumbsUpDate').read()
30 def handle_starttag(self
, tag
, attrs
):
31 if self
.__mode
== 'stations':
33 for attr
, value
in attrs
:
34 if attr
== 'class' and value
== 'station_table_row':
37 if self
.__in
_row
and tag
== 'a':
38 for attr
, value
in attrs
:
39 if self
.__in
_row
and attr
== 'href':
40 self
.stations
.append(value
[10:])
42 if self
.__mode
== 'tracks':
44 for attr
, value
in attrs
:
45 if attr
== 'class' and value
== 'track_title':
46 self
.__in
_track
= True
48 if attr
== 'tracktitle':
49 self
.__current
_track
= value
51 def handle_data(self
, text
):
53 self
.tracks
[self
.__current
_track
] = text
55 def handle_endtag(self
, tag
):
59 self
.__in
_track
= False
60 self
.__current
_track
= None
63 class search_youtube(HTMLParser
):
64 """ This class should be initiated with a list of search terms. It exposes a list of YouTube video ids `self.track_ids`. """
66 def __init__(self
, search_terms
):
68 for search
in search_terms
:
69 HTMLParser
.__init
__(self
)
71 self
.__in
_search
_results
= False
72 search
= urllib
.quote_plus(search
)
73 url
= 'http://youtube.com/results?search_query='
74 connection
= urllib
.urlopen(url
+ search
)
75 encoding
= connection
.headers
.getparam('charset')
76 page
= connection
.read()
77 page
= page
.decode(encoding
)
80 except UnicodeDecodeError:
81 print 'problem decoding', url
+ search
82 except UnicodeEncodeError:
83 print 'problem encoding', url
+ search
84 except HTMLParseError
:
85 # There is no way to override HTMLParseError and
86 # continue parsing, see:
87 # http://bugs.python.org/issue755660
88 # But the data is there!
89 print 'problem parsing', url
+ search
93 def handle_starttag(self
, tag
, attrs
):
95 for attr
, value
in attrs
:
96 if attr
== 'id' and value
== 'search-results':
97 self
.__in
_search
_results
= True
98 if self
.__in
_search
_results
:
99 for attr
, value
in attrs
:
100 if attr
== 'href' and value
[:-11] == '/watch?v=' and len(value
[9:]) == 11:
101 self
.track_ids
.append(value
[9:])
102 self
.__in
_search
_results
= False
104 # Calling self.reset() causes the following error:
106 # File "/usr/lib/python2.6/HTMLParser.py", line 108, in feed self.goahead(0)
107 # File "/usr/lib/python2.6/HTMLParser.py", line 148, in goahead k = self.parse_starttag(i)
108 # File "/usr/lib/python2.6/HTMLParser.py", line 229, in parse_starttag endpos = self.check_for_whole_start_tag(i)
109 # File "/usr/lib/python2.6/HTMLParser.py", line 305, in check_for_whole_start_tag
110 # raise AssertionError("we should not get here!")
112 # I can't figure out why that's happening. I've
113 # discovered that calling HTMLParser.__init__(self)
114 # inside the search term loop in self.__init__ also
115 # resets the instance. The instance must be reset to
116 # accept a new page with self.feed(). Until a better
120 class found_video(BaseException
):
121 """ Exception class to throw after finding a video to stop HTMLParser. """