Updated README to mention BeautifulSoup, got rid of unnecessary print statement
[i_like_pandora.git] / likes_pandora.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 __author__ = ("Dylan Lloyd <dylan@psu.edu>")
5 __license__ = "BSD"
6
7 # SETTINGS
8
9 USER = 'alphabethos' # pandora account name http://pandora.com/people<USER>
10 DIR = '/home/dylan/pandora/' # where to download the videos - will not be automatically created
11 YT_DL = '/usr/bin/youtube-dl' # Path to youtube-dl
12 NOTIFICATIONS = True # False
13 DEFAULT_ICON ='/usr/share/icons/gnome/48x48/mimetypes/gnome-mime-application-x-shockwave-flash.png' # for notifications
14 YT_OPT = '--no-progress --ignore-errors --continue --max-quality=22 -o "%(stitle)s---%(id)s.%(ext)s"'
15 # END OF SETTINGS
16
17 from BeautifulSoup import BeautifulSoup
18 import urllib
19 import urllib2
20 import os
21 import re
22 import copy
23 import shlex, subprocess
24
25 if NOTIFICATIONS:
26 import pynotify
27 import hashlib
28 import tempfile
29 import string
30
31 def fetch_stations(user):
32 """ This takes a pandora username and returns the a list of the station tokens that the user is subscribed to. """
33 stations = []
34 page = urllib.urlopen('http://www.pandora.com/favorites/profile_tablerows_station.vm?webname=' + USER)
35 page = BeautifulSoup(page)
36 table = page.findAll('div', attrs={'class':'station_table_row'})
37 for row in table:
38 if row.find('a'):
39 for attr, value in row.find('a').attrs:
40 if attr == 'href':
41 stations.append(value[10:])
42 return stations
43
44 def fetch_tracks(stations):
45 """ Takes a list of station tokens and returns a list of youtube search urls.
46 What this should really do is just return the Title + Artist strings.
47 """
48 search_strings = []
49 for station in stations:
50 page = urllib.urlopen('http://www.pandora.com/favorites/station_tablerows_thumb_up.vm?token=' + station + '&sort_col=thumbsUpDate')
51 page = BeautifulSoup(page)
52 titles = []
53 artists = []
54 for span in page.findAll('span', attrs={'class':'track_title'}):
55 for attr, value in span.attrs:
56 if attr == 'tracktitle':
57 titles.append(value)
58 for anchor in page.findAll('a'):
59 artists.append(anchor.string)
60 if len(titles) == len(artists):
61 i = 0
62 for title in titles:
63 search_string = title + ' ' + artists[i]
64 search_strings.append(search_string)
65 i += 1
66 else:
67 pass ## ERROR
68 return search_strings
69
70 def fetch_search_video_ids(search_strings):
71 """ This takes a list of youtube search urls and tries to find the first result. It returns a list of youtube video ids.
72 It really should take a list of ids instead.
73 """
74 video_list = []
75 for search_string in search_strings:
76 search_url = 'http://youtube.com/results?search_query=' + urllib.quote_plus(search_string)
77 page = urllib.urlopen(search_url)
78 page = BeautifulSoup(page)
79 result = page.find('div', attrs={'class':'video-main-content'})
80 if result == None:
81 print 'odd feedback for search, could not find div at ', search_url
82 continue
83 for attr, value in result.attrs:
84 if attr == 'id' and len(value[19:]) == 11:
85 video_list.append(value[19:])
86 elif attr == 'id':
87 print 'odd feedback for search', search_url, " : ", value[19:]
88 return video_list
89
90
91 def check_for_existing(video_list):
92 """ Checks the download-folder for existing videos with same id and removes from video_list. """
93 filelist = os.listdir(DIR)
94 i = 0
95 for video in copy.deepcopy(video_list):
96 for files in filelist:
97 if re.search(video,files):
98 del video_list[i]
99 i -= 1
100 i += 1
101 return video_list
102
103 def fetch_videos(videolist):
104 """ Uses subprocess to trigger a download using youtube-dl of the list created earlier, and triggers notifications if enabled. """
105 os.chdir(DIR)
106 args = shlex.split(YT_DL + ' ' + YT_OPT)
107 if NOTIFICATIONS: regex = re.compile("\[download\] Destination: (.+)")
108 for item in videolist:
109 if item:
110 thread = subprocess.Popen(args + ["http://youtube.com/watch?v=" + item], stdout=subprocess.PIPE)
111 output = thread.stdout.read()
112 if NOTIFICATIONS:
113 video_file = regex.findall(output)
114 if len(video_file) == 0:
115 break
116 thumbnail = hashlib.md5('file://' + DIR + video_file[0]).hexdigest() + '.png'
117 # Two '/'s instead of three because the path is
118 # absolute; I'm not sure how this'd work on windows.
119 title, sep, vid_id = video_file[0].rpartition('---')
120 title = string.replace(title, '_', ' ')
121 thumbnail = os.path.join(os.path.expanduser('~/.thumbnails/normal'), thumbnail)
122 if not os.path.isfile(thumbnail):
123 opener = urllib2.build_opener()
124 try:
125 page = opener.open('http://img.youtube.com/vi/' + item + '/1.jpg')
126 thumb = page.read()
127 # The thumbnail really should be saved to
128 # ~/.thumbnails/normal (Thumbnail Managing
129 # Standard)
130 # [http://jens.triq.net/thumbnail-spec/]
131 # As others have had problems anyway
132 # (http://mail.gnome.org/archives/gnome-list/2010-October/msg00009.html)
133 # I decided not to bother at the moment.
134 temp = tempfile.NamedTemporaryFile(suffix='.jpg')
135 temp.write(thumb)
136 temp.flush()
137 note = pynotify.Notification(title, 'video downloaded', temp.name)
138 except:
139 note = pynotify.Notification(title, 'video downloaded', DEFAULT_ICON)
140 else:
141 # Generally, this will never happen, because the
142 # video is a new file.
143 note = pynotify.Notification(title, 'video downloaded', thumbnail)
144 note.show()
145
146
147 def main():
148 stations = fetch_stations(USER)
149 if len(stations) == 0:
150 print 'are you sure your pandora profile is public?'
151 search_urls = fetch_tracks(stations)
152 video_list = fetch_search_video_ids(search_urls)
153 video_list = check_for_existing(video_list)
154 fetch_videos(video_list)
155
156 if __name__ == "__main__":
157 main()