From: Dylan Lloyd <dylan@psu.edu>
Date: Tue, 18 Jan 2011 19:23:17 +0000 (-0500)
Subject: Now checks for already downloaded files.
X-Git-Url: https://disinclined.org/git/?a=commitdiff_plain;h=4c13da9a190c4d9d3e3540c2993db83ac24e8b42;p=i_like_pandora.git

Now checks for already downloaded files.

Fixed crucial import error.
Added '#!/usr/bin/env python' to allow install.
Added additional documentation, including thoughts on future refactoring.
Added a rudimentary check for at least one station to exist, otherwise asks if the station is public, this should also explain how.
---

diff --git a/likes_pandora.py b/likes_pandora.py
old mode 100644
new mode 100755
index c787863..c302d7f
--- a/likes_pandora.py
+++ b/likes_pandora.py
@@ -1,27 +1,32 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
 __author__ = ("Dylan Lloyd <dylan@psu.edu>")
 __license__ = "BSD"
 
 # SETTINGS
 
-USER = 'alphabethos'
-DIR = '/home/dylan/pandora/'
+USER = 'alphabethos' # pandora account name http://pandora.com/people<USER>
+DIR = '/home/dylan/pandora/' # where to download the videos - will not be automatically created
 YT_DL = '/usr/bin/youtube-dl' # Path to youtube-dl
 NOTIFICATIONS = True # False
-DEFAULT_ICON ='/usr/share/icons/gnome/48x48/mimetypes/gnome-mime-application-x-shockwave-flash.png'
+DEFAULT_ICON ='/usr/share/icons/gnome/48x48/mimetypes/gnome-mime-application-x-shockwave-flash.png' # for notifications
 YT_OPT = '--no-progress --ignore-errors --continue --max-quality=22 -o "%(stitle)s---%(id)s.%(ext)s"'
 # END OF SETTINGS
 
 from BeautifulSoup import BeautifulSoup
 import urllib
 import urllib2
-import pynotify
-import tempfile
-import string
+import os
+import re
+import copy
+import shlex, subprocess
+
 if NOTIFICATIONS:
+    import pynotify
     import hashlib
-    import os
-    import shlex, subprocess
-    import re
+    import tempfile
+    import string
 
 def fetch_stations(user):
     """ This takes a pandora username and returns the a list of the station tokens that the user is subscribed to. """
@@ -40,7 +45,7 @@ def fetch_tracks(stations):
     """ Takes a list of station tokens and returns a list of youtube search urls.
         What this should really do is just return the Title + Artist strings.
     """
-    search_urls = []
+    search_strings = []
     for station in stations:
         page = urllib.urlopen('http://www.pandora.com/favorites/station_tablerows_thumb_up.vm?token=' + station + '&sort_col=thumbsUpDate')
         page = BeautifulSoup(page)
@@ -55,39 +60,46 @@ def fetch_tracks(stations):
         if len(titles) == len(artists):
             i = 0
             for title in titles:
-                search_url = 'http://youtube.com/results?search_query=' + urllib.quote_plus(title + ' ' + artists[i])
-                search_urls.append(search_url)
+                search_string = title + ' ' + artists[i]
+                search_strings.append(search_string)
                 i += 1
         else:
            pass  ## ERROR
-    return search_urls
+    return search_strings
 
-def fetch_search_video_ids(search_urls):
+def fetch_search_video_ids(search_strings):
     """ This takes a list of youtube search urls and tries to find the first result. It returns a list of youtube video ids.
         It really should take a list of ids instead.
     """
     video_list = []
-    for url in search_urls:
-        page = urllib.urlopen(url)
+    for search_string in search_strings:
+        search_url = 'http://youtube.com/results?search_query=' + urllib.quote_plus(search_string)
+        page = urllib.urlopen(search_url)
         page = BeautifulSoup(page)
         result = page.find('div', attrs={'class':'video-main-content'})
+        print result
+        if result == None:
+            print 'odd feedback for search, could not find div at ', search_url
+            continue
         for attr, value in result.attrs:
             if attr == 'id' and len(value[19:]) == 11:
                 video_list.append(value[19:])
             elif attr == 'id':
-                print 'odd feedback for url', url, " : ", value[19:]
+                print 'odd feedback for search', search_url, " : ", value[19:]
     return video_list
 
 
-def check_for_existing():
-    """ Checks the download-folder for existing videos with same id and removes from videolist. """
-    videolist = get_video_ids()
+def check_for_existing(video_list):
+    """ Checks the download-folder for existing videos with same id and removes from video_list. """
     filelist = os.listdir(DIR)
-    for video in copy.deepcopy(videolist):
+    i = 0
+    for video in copy.deepcopy(video_list):
         for files in filelist:
             if re.search(video,files):
-                del videolist[video]
-    return videolist
+                del video_list[i]
+                i -= 1
+        i += 1
+    return video_list
 
 def fetch_videos(videolist):
     """ Uses subprocess to trigger a download using youtube-dl of the list created earlier, and triggers notifications if enabled. """
@@ -96,7 +108,7 @@ def fetch_videos(videolist):
     if NOTIFICATIONS: regex = re.compile("\[download\] Destination: (.+)")
     for item in videolist:
         if item:
-            thread = subprocess.Popen(args + [item], stdout=subprocess.PIPE)
+            thread = subprocess.Popen(args + ["http://youtube.com/watch?v=" + item], stdout=subprocess.PIPE)
             output = thread.stdout.read()
             if NOTIFICATIONS:
                 video_file = regex.findall(output)
@@ -135,8 +147,11 @@ def fetch_videos(videolist):
 
 def main():
     stations = fetch_stations(USER)
+    if len(stations) == 0:
+        print 'are you sure your pandora profile is public?'
     search_urls = fetch_tracks(stations)
     video_list = fetch_search_video_ids(search_urls)
+    video_list = check_for_existing(video_list)
     fetch_videos(video_list)
 
 if __name__ ==  "__main__":