From: Dylan Lloyd Date: Tue, 25 Jan 2011 02:34:10 +0000 (-0500) Subject: Now fetching Youtube ID's, but errors at unicode X-Git-Url: https://disinclined.org/git/?a=commitdiff_plain;h=2219258d3c5910ec3e0c941131c1d242cbf51af1;p=i_like_pandora.git Now fetching Youtube ID's, but errors at unicode --- diff --git a/doit.py b/doit.py new file mode 100755 index 0000000..616d2c9 --- /dev/null +++ b/doit.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python +from HTMLParser import HTMLParser +import urllib + +class pandora_fetch(HTMLParser): + + def __init__(self, user): + HTMLParser.__init__(self) + self.user = user + self.stations = [] + self.tracks = {} + self.__in_row = False + self.__in_track = False + self.__current_track = None + self.__mode = 'stations' + page = urllib.urlopen('http://www.pandora.com/favorites/profile_tablerows_station.vm?webname=' + self.user).read() + self.feed(page) + self.__mode = 'tracks' + for station in self.stations: + page = urllib.urlopen('http://www.pandora.com/favorites/station_tablerows_thumb_up.vm?token=' + station + '&sort_col=thumbsUpDate').read() + self.feed(page) + + def handle_starttag(self, tag, attrs): + if self.__mode == 'stations': + if tag == 'div': + for attr, value in attrs: + if attr == 'class' and value == 'station_table_row': + self.__in_row = True + continue + if self.__in_row and tag == 'a': + for attr, value in attrs: + if self.__in_row and attr == 'href': + self.stations.append(value[10:]) + continue + if self.__mode == 'tracks': + if tag == 'span': + for attr, value in attrs: + if attr == 'class' and value == 'track_title': + self.__in_track = True + continue + if attr == 'tracktitle': + self.__current_track = value + + def handle_data(self, text): + if self.__in_track: + self.tracks[self.__current_track] = text + + def handle_endtag(self, tag): + if tag == 'div': + self.__in_row = False + if tag == 'a': + self.__in_track = False + self.__current_track = None diff --git a/youtube.py b/youtube.py new file mode 100755 index 0000000..324b2c6 --- /dev/null +++ b/youtube.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python + +from HTMLParser import HTMLParser +import urllib +from doit import pandora_fetch + +USER = 'alphabethos' +user_data = pandora_fetch(USER) + + +searches = [] +for title, artist in user_data.tracks.iteritems(): + search = title + " " + artist + searches.append(search) + +class search_youtube(HTMLParser): + + def __init__(self, search_terms): + HTMLParser.__init__(self) + self.track_ids = [] + for search in search_terms: + self.__in_result = False + search = urllib.quote_plus(search) + query = 'http://youtube.com/results?search_query=' + page = urllib.urlopen(query + search).read() + self.feed(page) + + def handle_starttag(self, tag, attrs): + if tag == 'div': + track_id = '' + for attr, value in attrs: + if attr == 'class' and value == 'video-main-content': + self.__in_result = True + if attr == 'id': + track_id = value + if self.__in_result and len(track_id[19:]) == 11: + self.track_ids.append(track_id[19:]) + print track_id[19:] + self.__in_result = False + + def handle_endtag(self, tag): + pass + + + +results = search_youtube(searches)