likes_pandora.py

   1 __author__ = ("Dylan Lloyd <dylan@psu.edu>")
   2 __license__ = "BSD"
   3
   4 # SETTINGS
   5
   6 USER = 'alphabethos'
   7 DIR = '/home/dylan/pandora/'
   8 YT_DL = '/usr/bin/youtube-dl' # Path to youtube-dl
   9 NOTIFICATIONS = True
  10 DEFAULT_ICON ='/usr/share/icons/gnome/48x48/mimetypes/gnome-mime-application-x-shockwave-flash.png'
  11 YT_OPT = '--no-progress --ignore-errors --continue --max-quality=22 -o "%(stitle)s---%(id)s.%(ext)s"'
  12 # END OF SETTINGS
  13
  14 import urllib
  15 import urllib2
  16 from BeautifulSoup import BeautifulSoup
  17 import pynotify
  18 import tempfile
  19 import string
  20 import hashlib
  21 import os
  22 import shlex, subprocess
  23 import re
  24
  25 def fetch_stations(user):
  26     """ This takes a pandora username and returns the a list of the station tokens that the user is subscribed to. """
  27     stations = []
  28     page = urllib.urlopen('http://www.pandora.com/favorites/profile_tablerows_station.vm?webname=' + USER)
  29     page = BeautifulSoup(page)
  30     table = page.findAll('div', attrs={'class':'station_table_row'})
  31     for row in table:
  32         if row.find('a'):
  33             for attr, value in row.find('a').attrs:
  34                 if attr == 'href':
  35                     stations.append(value[10:])
  36     return stations
  37
  38 def fetch_tracks(stations):
  39     """ Takes a list of station tokens and returns a list of youtube search urls.
  40         What this should really do is just return the Title + Artist strings.
  41     """
  42     search_urls = []
  43     for station in stations:
  44         page = urllib.urlopen('http://www.pandora.com/favorites/station_tablerows_thumb_up.vm?token=' + station + '&sort_col=thumbsUpDate')
  45         page = BeautifulSoup(page)
  46         titles = []
  47         artists = []
  48         for span in page.findAll('span', attrs={'class':'track_title'}):
  49             for attr, value in span.attrs:
  50                 if attr == 'tracktitle':
  51                     titles.append(value)
  52         for anchor in page.findAll('a'):
  53             artists.append(anchor.string)
  54         if len(titles) == len(artists):
  55             i = 0
  56             for title in titles:
  57                 search_url = 'http://youtube.com/results?search_query=' + urllib.quote_plus(title + ' ' + artists[i])
  58                 search_urls.append(search_url)
  59                 i += 1
  60         else:
  61            pass  ## ERROR
  62     return search_urls
  63
  64 def fetch_search_video_ids(search_urls):
  65     """ This takes a list of youtube search urls and tries to find the first result. It returns a list of youtube video ids.
  66         It really should take a list of ids instead.
  67     """
  68     video_list = []
  69     for url in search_urls:
  70         page = urllib.urlopen(url)
  71         page = BeautifulSoup(page)
  72         result = page.find('div', attrs={'class':'video-main-content'})
  73         for attr, value in result.attrs:
  74             if attr == 'id' and len(value[19:]) == 11:
  75                 video_list.append(value[19:])
  76             elif attr == 'id':
  77                 print 'odd feedback for url', url, " : ", value[19:]
  78     return video_list
  79
  80
  81 def check_for_existing():
  82     """ Checks the download-folder for existing videos with same id and removes from videolist. """
  83     videolist = get_video_ids()
  84     filelist = os.listdir(DIR)
  85     for video in copy.deepcopy(videolist):
  86         for files in filelist:
  87             if re.search(video,files):
  88                 del videolist[video]
  89     return videolist
  90
  91 def fetch_videos(videolist):
  92     """ Uses subprocess to trigger a download using youtube-dl of the list created earlier, and triggers notifications if enabled. """
  93     os.chdir(DIR)
  94     args = shlex.split(YT_DL + ' ' + YT_OPT)
  95     if NOTIFICATIONS: regex = re.compile("\[download\] Destination: (.+)")
  96     for item in videolist:
  97         if item:
  98             thread = subprocess.Popen(args + [item], stdout=subprocess.PIPE)
  99             output = thread.stdout.read()
 100             if NOTIFICATIONS:
 101                 video_file = regex.findall(output)
 102                 if len(video_file) == 0:
 103                     break
 104                 thumbnail = hashlib.md5('file://' + DIR + video_file[0]).hexdigest() + '.png'
 105                 # Two '/'s instead of three because the path is
 106                 # absolute; I'm not sure how this'd work on windows.
 107                 title, sep, vid_id = video_file[0].rpartition('---')
 108                 title = string.replace(title, '_', ' ')
 109                 thumbnail = os.path.join(os.path.expanduser('~/.thumbnails/normal'), thumbnail)
 110                 if not os.path.isfile(thumbnail):
 111                     opener = urllib2.build_opener()
 112                     try:
 113                         page = opener.open('http://img.youtube.com/vi/' + item + '/1.jpg')
 114                         thumb = page.read()
 115                         # The thumbnail really should be saved to
 116                         # ~/.thumbnails/normal (Thumbnail Managing
 117                         # Standard)
 118                         # [http://jens.triq.net/thumbnail-spec/]
 119                         # As others have had problems anyway
 120                         # (http://mail.gnome.org/archives/gnome-list/2010-October/msg00009.html)
 121                         # I decided not to bother at the moment.
 122                         temp = tempfile.NamedTemporaryFile(suffix='.jpg')
 123                         temp.write(thumb)
 124                         temp.flush()
 125                         note = pynotify.Notification(title, 'video downloaded', temp.name)
 126                     except:
 127                         note = pynotify.Notification(title, 'video downloaded', DEFAULT_ICON)
 128                 else:
 129                     # Generally, this will never happen, because the
 130                     # video is a new file.
 131                     note = pynotify.Notification(title, 'video downloaded', thumbnail)
 132                 note.show()
 133
 134
 135 def main():
 136     stations = fetch_stations(USER)
 137     search_urls = fetch_tracks(stations)
 138     video_list = fetch_search_video_ids(search_urls)
 139     fetch_videos(video_list)
 140
 141 if __name__ ==  "__main__":
 142     main()