likes_pandora.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 __author__ = ("Dylan Lloyd <dylan@psu.edu>")
   5 __license__ = "BSD"
   6
   7 # SETTINGS
   8
   9 USER = 'alphabethos' # pandora account name http://pandora.com/people<USER>
  10 DIR = '/home/dylan/pandora/' # where to download the videos - will not be automatically created
  11 YT_DL = '/usr/bin/youtube-dl' # Path to youtube-dl
  12 NOTIFICATIONS = True # False
  13 DEFAULT_ICON ='/usr/share/icons/gnome/48x48/mimetypes/gnome-mime-application-x-shockwave-flash.png' # for notifications
  14 YT_OPT = '--no-progress --ignore-errors --continue --max-quality=22 -o "%(stitle)s---%(id)s.%(ext)s"'
  15 # END OF SETTINGS
  16
  17 from BeautifulSoup import BeautifulSoup
  18 import urllib
  19 import urllib2
  20 import os
  21 import re
  22 import copy
  23 import shlex, subprocess
  24
  25 if NOTIFICATIONS:
  26     import pynotify
  27     import hashlib
  28     import tempfile
  29     import string
  30
  31 def fetch_stations(user):
  32     """ This takes a pandora username and returns the a list of the station tokens that the user is subscribed to. """
  33     stations = []
  34     page = urllib.urlopen('http://www.pandora.com/favorites/profile_tablerows_station.vm?webname=' + USER)
  35     page = BeautifulSoup(page)
  36     table = page.findAll('div', attrs={'class':'station_table_row'})
  37     for row in table:
  38         if row.find('a'):
  39             for attr, value in row.find('a').attrs:
  40                 if attr == 'href':
  41                     stations.append(value[10:])
  42     return stations
  43
  44 def fetch_tracks(stations):
  45     """ Takes a list of station tokens and returns a list of youtube search urls.
  46         What this should really do is just return the Title + Artist strings.
  47     """
  48     search_strings = []
  49     for station in stations:
  50         page = urllib.urlopen('http://www.pandora.com/favorites/station_tablerows_thumb_up.vm?token=' + station + '&sort_col=thumbsUpDate')
  51         page = BeautifulSoup(page)
  52         titles = []
  53         artists = []
  54         for span in page.findAll('span', attrs={'class':'track_title'}):
  55             for attr, value in span.attrs:
  56                 if attr == 'tracktitle':
  57                     titles.append(value)
  58         for anchor in page.findAll('a'):
  59             artists.append(anchor.string)
  60         if len(titles) == len(artists):
  61             i = 0
  62             for title in titles:
  63                 search_string = title + ' ' + artists[i]
  64                 search_strings.append(search_string)
  65                 i += 1
  66         else:
  67            pass  ## ERROR
  68     return search_strings
  69
  70 def fetch_search_video_ids(search_strings):
  71     """ This takes a list of youtube search urls and tries to find the first result. It returns a list of youtube video ids.
  72         It really should take a list of ids instead.
  73     """
  74     video_list = []
  75     for search_string in search_strings:
  76         search_url = 'http://youtube.com/results?search_query=' + urllib.quote_plus(search_string)
  77         page = urllib.urlopen(search_url)
  78         page = BeautifulSoup(page)
  79         result = page.find('div', attrs={'class':'video-main-content'})
  80         if result == None:
  81             print 'odd feedback for search, could not find div at ', search_url
  82             continue
  83         for attr, value in result.attrs:
  84             if attr == 'id' and len(value[19:]) == 11:
  85                 video_list.append(value[19:])
  86             elif attr == 'id':
  87                 print 'odd feedback for search', search_url, " : ", value[19:]
  88     return video_list
  89
  90
  91 def check_for_existing(video_list):
  92     """ Checks the download-folder for existing videos with same id and removes from video_list. """
  93     filelist = os.listdir(DIR)
  94     i = 0
  95     for video in copy.deepcopy(video_list):
  96         for files in filelist:
  97             if re.search(video,files):
  98                 del video_list[i]
  99                 i -= 1
 100         i += 1
 101     return video_list
 102
 103 def fetch_videos(videolist):
 104     """ Uses subprocess to trigger a download using youtube-dl of the list created earlier, and triggers notifications if enabled. """
 105     os.chdir(DIR)
 106     args = shlex.split(YT_DL + ' ' + YT_OPT)
 107     if NOTIFICATIONS: regex = re.compile("\[download\] Destination: (.+)")
 108     for item in videolist:
 109         if item:
 110             thread = subprocess.Popen(args + ["http://youtube.com/watch?v=" + item], stdout=subprocess.PIPE)
 111             output = thread.stdout.read()
 112             if NOTIFICATIONS:
 113                 video_file = regex.findall(output)
 114                 if len(video_file) == 0:
 115                     break
 116                 thumbnail = hashlib.md5('file://' + DIR + video_file[0]).hexdigest() + '.png'
 117                 # Two '/'s instead of three because the path is
 118                 # absolute; I'm not sure how this'd work on windows.
 119                 title, sep, vid_id = video_file[0].rpartition('---')
 120                 title = string.replace(title, '_', ' ')
 121                 thumbnail = os.path.join(os.path.expanduser('~/.thumbnails/normal'), thumbnail)
 122                 if not os.path.isfile(thumbnail):
 123                     opener = urllib2.build_opener()
 124                     try:
 125                         page = opener.open('http://img.youtube.com/vi/' + item + '/1.jpg')
 126                         thumb = page.read()
 127                         # The thumbnail really should be saved to
 128                         # ~/.thumbnails/normal (Thumbnail Managing
 129                         # Standard)
 130                         # [http://jens.triq.net/thumbnail-spec/]
 131                         # As others have had problems anyway
 132                         # (http://mail.gnome.org/archives/gnome-list/2010-October/msg00009.html)
 133                         # I decided not to bother at the moment.
 134                         temp = tempfile.NamedTemporaryFile(suffix='.jpg')
 135                         temp.write(thumb)
 136                         temp.flush()
 137                         note = pynotify.Notification(title, 'video downloaded', temp.name)
 138                     except:
 139                         note = pynotify.Notification(title, 'video downloaded', DEFAULT_ICON)
 140                 else:
 141                     # Generally, this will never happen, because the
 142                     # video is a new file.
 143                     note = pynotify.Notification(title, 'video downloaded', thumbnail)
 144                 note.show()
 145
 146
 147 def main():
 148     stations = fetch_stations(USER)
 149     if len(stations) == 0:
 150         print 'are you sure your pandora profile is public?'
 151     search_urls = fetch_tracks(stations)
 152     video_list = fetch_search_video_ids(search_urls)
 153     video_list = check_for_existing(video_list)
 154     fetch_videos(video_list)
 155
 156 if __name__ ==  "__main__":
 157     main()