Now fetching Youtube ID's, but errors at unicode
authorDylan Lloyd <dylan@psu.edu>
Tue, 25 Jan 2011 02:34:10 +0000 (21:34 -0500)
committerDylan Lloyd <dylan@psu.edu>
Tue, 25 Jan 2011 02:34:10 +0000 (21:34 -0500)
doit.py [new file with mode: 0755]
youtube.py [new file with mode: 0755]

diff --git a/doit.py b/doit.py
new file mode 100755 (executable)
index 0000000..616d2c9
--- /dev/null
+++ b/doit.py
@@ -0,0 +1,53 @@
+#!/usr/bin/env python
+from HTMLParser import HTMLParser
+import urllib
+
+class pandora_fetch(HTMLParser):
+
+    def __init__(self, user):
+        HTMLParser.__init__(self)
+        self.user = user
+        self.stations = []
+        self.tracks = {}
+        self.__in_row = False
+        self.__in_track = False
+        self.__current_track = None
+        self.__mode = 'stations'
+        page = urllib.urlopen('http://www.pandora.com/favorites/profile_tablerows_station.vm?webname=' + self.user).read()
+        self.feed(page)
+        self.__mode = 'tracks'
+        for station in self.stations:
+            page = urllib.urlopen('http://www.pandora.com/favorites/station_tablerows_thumb_up.vm?token=' + station + '&sort_col=thumbsUpDate').read()
+            self.feed(page)
+
+    def handle_starttag(self, tag, attrs):
+        if self.__mode == 'stations':
+            if tag == 'div':
+                for attr, value in attrs:
+                    if attr == 'class' and value == 'station_table_row':
+                        self.__in_row = True
+                        continue
+            if self.__in_row and tag == 'a':
+                for attr, value in attrs:
+                    if self.__in_row and attr == 'href':
+                        self.stations.append(value[10:])
+                        continue
+        if self.__mode == 'tracks':
+            if tag == 'span':
+                for attr, value in attrs:
+                    if attr == 'class' and value == 'track_title':
+                        self.__in_track = True
+                        continue
+                    if attr == 'tracktitle':
+                        self.__current_track = value
+
+    def handle_data(self, text):
+        if self.__in_track:
+            self.tracks[self.__current_track] = text
+
+    def handle_endtag(self, tag):
+        if tag == 'div':
+            self.__in_row = False
+        if tag == 'a':
+            self.__in_track = False
+            self.__current_track = None
diff --git a/youtube.py b/youtube.py
new file mode 100755 (executable)
index 0000000..324b2c6
--- /dev/null
@@ -0,0 +1,46 @@
+#!/usr/bin/env python
+
+from HTMLParser import HTMLParser
+import urllib
+from doit import pandora_fetch
+
+USER = 'alphabethos'
+user_data = pandora_fetch(USER)
+
+
+searches = []
+for title, artist in user_data.tracks.iteritems():
+    search = title + " " + artist
+    searches.append(search)
+
+class search_youtube(HTMLParser):
+
+    def __init__(self, search_terms):
+        HTMLParser.__init__(self)
+        self.track_ids = []
+        for search in search_terms:
+            self.__in_result = False
+            search = urllib.quote_plus(search)
+            query = 'http://youtube.com/results?search_query='
+            page = urllib.urlopen(query + search).read()
+            self.feed(page)
+
+    def handle_starttag(self, tag, attrs):
+        if tag == 'div':
+            track_id = ''
+            for attr, value in attrs:
+                if attr == 'class' and value == 'video-main-content':
+                    self.__in_result = True
+                if attr == 'id':
+                    track_id = value
+            if self.__in_result and len(track_id[19:]) == 11:
+                self.track_ids.append(track_id[19:])
+                print track_id[19:]
+                self.__in_result = False
+
+    def handle_endtag(self, tag):
+        pass
+
+
+
+results = search_youtube(searches)