likes_pandora.py

   1 __author__ = ("Dylan Lloyd <dylan@psu.edu>")
   2 __license__ = "BSD"
   3
   4 # SETTINGS
   5
   6 USER = 'alphabethos'
   7 DIR = '/home/dylan/pandora/'
   8 YT_DL = '/usr/bin/youtube-dl' # Path to youtube-dl
   9 NOTIFICATIONS = True
  10 DEFAULT_ICON ='/usr/share/icons/gnome/48x48/mimetypes/gnome-mime-application-x-shockwave-flash.png'
  11 YT_OPT = '--no-progress --ignore-errors --continue --max-quality=22 -o "%(stitle)s---%(id)s.%(ext)s"'
  12 # END OF SETTINGS
  13
  14 import urllib
  15 import urllib2
  16 from BeautifulSoup import BeautifulSoup
  17 import pynotify
  18 import tempfile
  19 import string
  20 import hashlib
  21 import os
  22 import shlex, subprocess
  23 import re
  24
  25 def fetch_stations(user):
  26     stations = []
  27     page = urllib.urlopen('http://www.pandora.com/favorites/profile_tablerows_station.vm?webname=' + USER)
  28     page = BeautifulSoup(page)
  29     table = page.findAll('div', attrs={'class':'station_table_row'})
  30     for row in table:
  31         if row.find('a'):
  32             for attr, value in row.find('a').attrs:
  33                 if attr == 'href':
  34                     stations.append(value[10:])
  35     return stations
  36
  37 def fetch_tracks(stations):
  38     search_urls = []
  39     for station in stations:
  40         page = urllib.urlopen('http://www.pandora.com/favorites/station_tablerows_thumb_up.vm?token=' + station + '&sort_col=thumbsUpDate')
  41         page = BeautifulSoup(page)
  42         titles = []
  43         artists = []
  44         for span in page.findAll('span', attrs={'class':'track_title'}):
  45             for attr, value in span.attrs:
  46                 if attr == 'tracktitle':
  47                     titles.append(value)
  48         for anchor in page.findAll('a'):
  49             artists.append(anchor.string)
  50         if len(titles) == len(artists):
  51             i = 0
  52             for title in titles:
  53                 search_url = 'http://youtube.com/results?search_query=' + urllib.quote_plus(title + ' ' + artists[i])
  54                 search_urls.append(search_url)
  55                 i += 1
  56         else:
  57            pass  ## ERROR
  58     return search_urls
  59
  60 def fetch_search_video_ids(search_urls):
  61     video_list = []
  62     for url in search_urls:
  63         page = urllib.urlopen(url)
  64         page = BeautifulSoup(page)
  65         result = page.find('div', attrs={'class':'video-main-content'})
  66         for attr, value in result.attrs:
  67             if attr == 'id' and len(value[19:]) == 11:
  68                 video_list.append(value[19:])
  69             elif attr == 'id':
  70                 print 'odd feedback for url', url, " : ", value[19:]
  71     return video_list
  72
  73
  74 def check_for_existing():
  75     """ Checks the download-folder for existing videos with same id and removes from videolist. """
  76     videolist = get_video_ids()
  77     filelist = os.listdir(DIR)
  78     for video in copy.deepcopy(videolist):
  79         for files in filelist:
  80             if re.search(video,files):
  81                 del videolist[video]
  82     return videolist
  83
  84 def fetch_videos(videolist):
  85     """ Uses subprocess to trigger a download using youtube-dl of the list created earlier. """
  86     os.chdir(DIR)
  87     args = shlex.split(YT_DL + ' ' + YT_OPT)
  88     if NOTIFICATIONS: regex = re.compile("\[download\] Destination: (.+)")
  89     for item in videolist:
  90         if item:
  91             thread = subprocess.Popen(args + [item], stdout=subprocess.PIPE)
  92             output = thread.stdout.read()
  93             if NOTIFICATIONS:
  94                 video_file = regex.findall(output)
  95                 if len(video_file) == 0:
  96                     break
  97                 thumbnail = hashlib.md5('file://' + DIR + video_file[0]).hexdigest() + '.png'
  98                 # Two '/'s instead of three because the path is
  99                 # absolute; I'm not sure how this'd work on windows.
 100                 title, sep, vid_id = video_file[0].rpartition('---')
 101                 title = string.replace(title, '_', ' ')
 102                 thumbnail = os.path.join(os.path.expanduser('~/.thumbnails/normal'), thumbnail)
 103                 if not os.path.isfile(thumbnail):
 104                     opener = urllib2.build_opener()
 105                     try:
 106                         page = opener.open('http://img.youtube.com/vi/' + item + '/1.jpg')
 107                         thumb = page.read()
 108                         # The thumbnail really should be saved to
 109                         # ~/.thumbnails/normal (Thumbnail Managing
 110                         # Standard)
 111                         # [http://jens.triq.net/thumbnail-spec/]
 112                         # As others have had problems anyway
 113                         # (http://mail.gnome.org/archives/gnome-list/2010-October/msg00009.html)
 114                         # I decided not to bother at the moment.
 115                         temp = tempfile.NamedTemporaryFile(suffix='.jpg')
 116                         temp.write(thumb)
 117                         temp.flush()
 118                         note = pynotify.Notification(title, 'video downloaded', temp.name)
 119                     except:
 120                         note = pynotify.Notification(title, 'video downloaded', DEFAULT_ICON)
 121                 else:
 122                     # Generally, this will never happen, because the
 123                     # video is a new file.
 124                     note = pynotify.Notification(title, 'video downloaded', thumbnail)
 125                 note.show()
 126
 127
 128 def main():
 129     stations = fetch_stations(USER)
 130     search_urls = fetch_tracks(stations)
 131     video_list = fetch_search_video_ids(search_urls)
 132     fetch_videos(video_list)
 133
 134 if __name__ ==  "__main__":
 135     main()