likes_pandora.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 __author__ = ("Dylan Lloyd <dylan@psu.edu>")
   5 __license__ = "BSD"
   6
   7 # SETTINGS
   8
   9 USER = 'alphabethos' # pandora account name http://pandora.com/people<USER>
  10 DIR = '/home/dylan/pandora/' # where to download the videos - will not be automatically created
  11 YT_DL = '/usr/bin/youtube-dl' # Path to youtube-dl
  12 NOTIFICATIONS = True # False
  13 DEFAULT_ICON ='/usr/share/icons/gnome/48x48/mimetypes/gnome-mime-application-x-shockwave-flash.png' # for notifications
  14 YT_OPT = '--no-progress --ignore-errors --continue --max-quality=22 -o "%(stitle)s---%(id)s.%(ext)s"'
  15 # END OF SETTINGS
  16
  17 from BeautifulSoup import BeautifulSoup
  18 import urllib
  19 import urllib2
  20 import os
  21 import re
  22 import copy
  23 import shlex, subprocess
  24
  25 if NOTIFICATIONS:
  26     import pynotify
  27     import hashlib
  28     import tempfile
  29     import string
  30
  31 def fetch_stations(user):
  32     """ This takes a pandora username and returns the a list of the station tokens that the user is subscribed to. """
  33     stations = []
  34     page = urllib.urlopen('http://www.pandora.com/favorites/profile_tablerows_station.vm?webname=' + USER)
  35     page = BeautifulSoup(page)
  36     table = page.findAll('div', attrs={'class':'station_table_row'})
  37     for row in table:
  38         if row.find('a'):
  39             for attr, value in row.find('a').attrs:
  40                 if attr == 'href':
  41                     stations.append(value[10:])
  42     return stations
  43
  44 def fetch_tracks(stations):
  45     """ Takes a list of station tokens and returns a list of youtube search urls.
  46         What this should really do is just return the Title + Artist strings.
  47     """
  48     search_strings = []
  49     for station in stations:
  50         page = urllib.urlopen('http://www.pandora.com/favorites/station_tablerows_thumb_up.vm?token=' + station + '&sort_col=thumbsUpDate')
  51         page = BeautifulSoup(page)
  52         titles = []
  53         artists = []
  54         for span in page.findAll('span', attrs={'class':'track_title'}):
  55             for attr, value in span.attrs:
  56                 if attr == 'tracktitle':
  57                     titles.append(value)
  58         for anchor in page.findAll('a'):
  59             artists.append(anchor.string)
  60         if len(titles) == len(artists):
  61             i = 0
  62             for title in titles:
  63                 search_string = title + ' ' + artists[i]
  64                 search_strings.append(search_string)
  65                 i += 1
  66         else:
  67            pass  ## ERROR
  68     return search_strings
  69
  70 def fetch_search_video_ids(search_strings):
  71     """ This takes a list of youtube search urls and tries to find the first result. It returns a list of youtube video ids.
  72         It really should take a list of ids instead.
  73     """
  74     video_list = []
  75     for search_string in search_strings:
  76         search_url = 'http://youtube.com/results?search_query=' + urllib.quote_plus(search_string)
  77         page = urllib.urlopen(search_url)
  78         page = BeautifulSoup(page)
  79         result = page.find('div', attrs={'class':'video-main-content'})
  80         print result
  81         if result == None:
  82             print 'odd feedback for search, could not find div at ', search_url
  83             continue
  84         for attr, value in result.attrs:
  85             if attr == 'id' and len(value[19:]) == 11:
  86                 video_list.append(value[19:])
  87             elif attr == 'id':
  88                 print 'odd feedback for search', search_url, " : ", value[19:]
  89     return video_list
  90
  91
  92 def check_for_existing(video_list):
  93     """ Checks the download-folder for existing videos with same id and removes from video_list. """
  94     filelist = os.listdir(DIR)
  95     i = 0
  96     for video in copy.deepcopy(video_list):
  97         for files in filelist:
  98             if re.search(video,files):
  99                 del video_list[i]
 100                 i -= 1
 101         i += 1
 102     return video_list
 103
 104 def fetch_videos(videolist):
 105     """ Uses subprocess to trigger a download using youtube-dl of the list created earlier, and triggers notifications if enabled. """
 106     os.chdir(DIR)
 107     args = shlex.split(YT_DL + ' ' + YT_OPT)
 108     if NOTIFICATIONS: regex = re.compile("\[download\] Destination: (.+)")
 109     for item in videolist:
 110         if item:
 111             thread = subprocess.Popen(args + ["http://youtube.com/watch?v=" + item], stdout=subprocess.PIPE)
 112             output = thread.stdout.read()
 113             if NOTIFICATIONS:
 114                 video_file = regex.findall(output)
 115                 if len(video_file) == 0:
 116                     break
 117                 thumbnail = hashlib.md5('file://' + DIR + video_file[0]).hexdigest() + '.png'
 118                 # Two '/'s instead of three because the path is
 119                 # absolute; I'm not sure how this'd work on windows.
 120                 title, sep, vid_id = video_file[0].rpartition('---')
 121                 title = string.replace(title, '_', ' ')
 122                 thumbnail = os.path.join(os.path.expanduser('~/.thumbnails/normal'), thumbnail)
 123                 if not os.path.isfile(thumbnail):
 124                     opener = urllib2.build_opener()
 125                     try:
 126                         page = opener.open('http://img.youtube.com/vi/' + item + '/1.jpg')
 127                         thumb = page.read()
 128                         # The thumbnail really should be saved to
 129                         # ~/.thumbnails/normal (Thumbnail Managing
 130                         # Standard)
 131                         # [http://jens.triq.net/thumbnail-spec/]
 132                         # As others have had problems anyway
 133                         # (http://mail.gnome.org/archives/gnome-list/2010-October/msg00009.html)
 134                         # I decided not to bother at the moment.
 135                         temp = tempfile.NamedTemporaryFile(suffix='.jpg')
 136                         temp.write(thumb)
 137                         temp.flush()
 138                         note = pynotify.Notification(title, 'video downloaded', temp.name)
 139                     except:
 140                         note = pynotify.Notification(title, 'video downloaded', DEFAULT_ICON)
 141                 else:
 142                     # Generally, this will never happen, because the
 143                     # video is a new file.
 144                     note = pynotify.Notification(title, 'video downloaded', thumbnail)
 145                 note.show()
 146
 147
 148 def main():
 149     stations = fetch_stations(USER)
 150     if len(stations) == 0:
 151         print 'are you sure your pandora profile is public?'
 152     search_urls = fetch_tracks(stations)
 153     video_list = fetch_search_video_ids(search_urls)
 154     video_list = check_for_existing(video_list)
 155     fetch_videos(video_list)
 156
 157 if __name__ ==  "__main__":
 158     main()