Now checks for already downloaded files.
[i_like_pandora.git] / likes_pandora.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 __author__ = ("Dylan Lloyd <dylan@psu.edu>")
5 __license__ = "BSD"
6
7 # SETTINGS
8
9 USER = 'alphabethos' # pandora account name http://pandora.com/people<USER>
10 DIR = '/home/dylan/pandora/' # where to download the videos - will not be automatically created
11 YT_DL = '/usr/bin/youtube-dl' # Path to youtube-dl
12 NOTIFICATIONS = True # False
13 DEFAULT_ICON ='/usr/share/icons/gnome/48x48/mimetypes/gnome-mime-application-x-shockwave-flash.png' # for notifications
14 YT_OPT = '--no-progress --ignore-errors --continue --max-quality=22 -o "%(stitle)s---%(id)s.%(ext)s"'
15 # END OF SETTINGS
16
17 from BeautifulSoup import BeautifulSoup
18 import urllib
19 import urllib2
20 import os
21 import re
22 import copy
23 import shlex, subprocess
24
25 if NOTIFICATIONS:
26 import pynotify
27 import hashlib
28 import tempfile
29 import string
30
31 def fetch_stations(user):
32 """ This takes a pandora username and returns the a list of the station tokens that the user is subscribed to. """
33 stations = []
34 page = urllib.urlopen('http://www.pandora.com/favorites/profile_tablerows_station.vm?webname=' + USER)
35 page = BeautifulSoup(page)
36 table = page.findAll('div', attrs={'class':'station_table_row'})
37 for row in table:
38 if row.find('a'):
39 for attr, value in row.find('a').attrs:
40 if attr == 'href':
41 stations.append(value[10:])
42 return stations
43
44 def fetch_tracks(stations):
45 """ Takes a list of station tokens and returns a list of youtube search urls.
46 What this should really do is just return the Title + Artist strings.
47 """
48 search_strings = []
49 for station in stations:
50 page = urllib.urlopen('http://www.pandora.com/favorites/station_tablerows_thumb_up.vm?token=' + station + '&sort_col=thumbsUpDate')
51 page = BeautifulSoup(page)
52 titles = []
53 artists = []
54 for span in page.findAll('span', attrs={'class':'track_title'}):
55 for attr, value in span.attrs:
56 if attr == 'tracktitle':
57 titles.append(value)
58 for anchor in page.findAll('a'):
59 artists.append(anchor.string)
60 if len(titles) == len(artists):
61 i = 0
62 for title in titles:
63 search_string = title + ' ' + artists[i]
64 search_strings.append(search_string)
65 i += 1
66 else:
67 pass ## ERROR
68 return search_strings
69
70 def fetch_search_video_ids(search_strings):
71 """ This takes a list of youtube search urls and tries to find the first result. It returns a list of youtube video ids.
72 It really should take a list of ids instead.
73 """
74 video_list = []
75 for search_string in search_strings:
76 search_url = 'http://youtube.com/results?search_query=' + urllib.quote_plus(search_string)
77 page = urllib.urlopen(search_url)
78 page = BeautifulSoup(page)
79 result = page.find('div', attrs={'class':'video-main-content'})
80 print result
81 if result == None:
82 print 'odd feedback for search, could not find div at ', search_url
83 continue
84 for attr, value in result.attrs:
85 if attr == 'id' and len(value[19:]) == 11:
86 video_list.append(value[19:])
87 elif attr == 'id':
88 print 'odd feedback for search', search_url, " : ", value[19:]
89 return video_list
90
91
92 def check_for_existing(video_list):
93 """ Checks the download-folder for existing videos with same id and removes from video_list. """
94 filelist = os.listdir(DIR)
95 i = 0
96 for video in copy.deepcopy(video_list):
97 for files in filelist:
98 if re.search(video,files):
99 del video_list[i]
100 i -= 1
101 i += 1
102 return video_list
103
104 def fetch_videos(videolist):
105 """ Uses subprocess to trigger a download using youtube-dl of the list created earlier, and triggers notifications if enabled. """
106 os.chdir(DIR)
107 args = shlex.split(YT_DL + ' ' + YT_OPT)
108 if NOTIFICATIONS: regex = re.compile("\[download\] Destination: (.+)")
109 for item in videolist:
110 if item:
111 thread = subprocess.Popen(args + ["http://youtube.com/watch?v=" + item], stdout=subprocess.PIPE)
112 output = thread.stdout.read()
113 if NOTIFICATIONS:
114 video_file = regex.findall(output)
115 if len(video_file) == 0:
116 break
117 thumbnail = hashlib.md5('file://' + DIR + video_file[0]).hexdigest() + '.png'
118 # Two '/'s instead of three because the path is
119 # absolute; I'm not sure how this'd work on windows.
120 title, sep, vid_id = video_file[0].rpartition('---')
121 title = string.replace(title, '_', ' ')
122 thumbnail = os.path.join(os.path.expanduser('~/.thumbnails/normal'), thumbnail)
123 if not os.path.isfile(thumbnail):
124 opener = urllib2.build_opener()
125 try:
126 page = opener.open('http://img.youtube.com/vi/' + item + '/1.jpg')
127 thumb = page.read()
128 # The thumbnail really should be saved to
129 # ~/.thumbnails/normal (Thumbnail Managing
130 # Standard)
131 # [http://jens.triq.net/thumbnail-spec/]
132 # As others have had problems anyway
133 # (http://mail.gnome.org/archives/gnome-list/2010-October/msg00009.html)
134 # I decided not to bother at the moment.
135 temp = tempfile.NamedTemporaryFile(suffix='.jpg')
136 temp.write(thumb)
137 temp.flush()
138 note = pynotify.Notification(title, 'video downloaded', temp.name)
139 except:
140 note = pynotify.Notification(title, 'video downloaded', DEFAULT_ICON)
141 else:
142 # Generally, this will never happen, because the
143 # video is a new file.
144 note = pynotify.Notification(title, 'video downloaded', thumbnail)
145 note.show()
146
147
148 def main():
149 stations = fetch_stations(USER)
150 if len(stations) == 0:
151 print 'are you sure your pandora profile is public?'
152 search_urls = fetch_tracks(stations)
153 video_list = fetch_search_video_ids(search_urls)
154 video_list = check_for_existing(video_list)
155 fetch_videos(video_list)
156
157 if __name__ == "__main__":
158 main()