parse.py

   1 from BeautifulSoup import BeautifulSoup
   2 import urllib
   3 import re
   4
   5 USER = 'alphabethos'
   6
   7 def fetch_stations(user):
   8     tokens = ['0081d3c8e037f4c32a44f01b1701dd31466957fc96e4da2e', 'db592464bbca03e7664b1093336f121ce8c7587b2172781c']
   9     return tokens
  10
  11 def fetch_tracks(stations):
  12     for station in stations:
  13         page = urllib.urlopen('http://www.pandora.com/favorites/station_tablerows_thumb_up.vm?token=' + station + '&sort_col=thumbsUpDate')
  14         page = BeautifulSoup(page)
  15         titles = []
  16         artists = []
  17         for span in page.findAll('span', attrs={'class':'track_title'}):
  18             for attr, value in span.attrs:
  19                 if attr == 'tracktitle':
  20                     titles.append(value)
  21         for anchor in page.findAll('a'):
  22             artists.append(anchor.string)
  23         if len(titles) == len(artists):
  24             i = 0
  25             for title in titles:
  26                 print '<a href=\'http://youtube.com/results?search_query=' + urllib.quote_plus(title + ' ' + artists[i]) + '\'>' + title + '</a> by', artists[i], '<br>'
  27                 i += 1
  28         else:
  29             print 'parsing error'
  30
  31 def main():
  32     stations = fetch_stations(USER)
  33     fetch_tracks(stations)
  34
  35 if __name__ ==  "__main__":
  36     main()
  37