parse.py

   1 __author__ = ("Dylan Lloyd <dylan@psu.edu>")
   2 __license__ = "BSD"
   3
   4 # SETTINGS
   5
   6 USER = 'alphabethos'
   7 # END OF SETTINGS
   8
   9 import urllib
  10 from BeautifulSoup import BeautifulSoup
  11
  12 def fetch_stations(user):
  13     stations = []
  14     page = urllib.urlopen('http://www.pandora.com/favorites/profile_tablerows_station.vm?webname=' + USER)
  15     page = BeautifulSoup(page)
  16     table = page.findAll('div', attrs={'class':'station_table_row'})
  17     for row in table:
  18         if row.find('a'):
  19             for attr, value in row.find('a').attrs:
  20                 if attr == 'href':
  21                     stations.append(value[10:])
  22     return stations
  23
  24 def fetch_tracks(stations):
  25     for station in stations:
  26         page = urllib.urlopen('http://www.pandora.com/favorites/station_tablerows_thumb_up.vm?token=' + station + '&sort_col=thumbsUpDate')
  27         page = BeautifulSoup(page)
  28         titles = []
  29         artists = []
  30         for span in page.findAll('span', attrs={'class':'track_title'}):
  31             for attr, value in span.attrs:
  32                 if attr == 'tracktitle':
  33                     titles.append(value)
  34         for anchor in page.findAll('a'):
  35             artists.append(anchor.string)
  36         if len(titles) == len(artists):
  37             i = 0
  38             for title in titles:
  39                 print '<a href=\'http://youtube.com/results?search_query=' + urllib.quote_plus(title + ' ' + artists[i]) + '\'>' + title + '</a> by', artists[i], '<br>'
  40                 i += 1
  41         else:
  42             print 'parsing error'
  43
  44 def main():
  45     stations = fetch_stations(USER)
  46     fetch_tracks(stations)
  47
  48 if __name__ ==  "__main__":
  49     main()