From d05619fab59d113dfb3937594e901ab7c4143cf9 Mon Sep 17 00:00:00 2001 From: Dylan Lloyd Date: Fri, 7 Jan 2011 21:01:37 -0500 Subject: [PATCH] now downloading videos, does not check for existing files --- parse.py | 90 ++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 81 insertions(+), 9 deletions(-) diff --git a/parse.py b/parse.py index 4d386b6..900c461 100644 --- a/parse.py +++ b/parse.py @@ -4,10 +4,23 @@ __license__ = "BSD" # SETTINGS USER = 'alphabethos' +DIR = '/home/dylan/pandora/' +YT_DL = '/usr/bin/youtube-dl' # Path to youtube-dl +NOTIFICATIONS = True +DEFAULT_ICON ='/usr/share/icons/gnome/48x48/mimetypes/gnome-mime-application-x-shockwave-flash.png' +YT_OPT = '--no-progress --ignore-errors --continue --max-quality=22 -o "%(stitle)s---%(id)s.%(ext)s"' # END OF SETTINGS import urllib +import urllib2 from BeautifulSoup import BeautifulSoup +import pynotify +import tempfile +import string +import hashlib +import os +import shlex, subprocess +import re def fetch_stations(user): stations = [] @@ -22,12 +35,12 @@ def fetch_stations(user): return stations def fetch_tracks(stations): + search_urls = [] for station in stations: page = urllib.urlopen('http://www.pandora.com/favorites/station_tablerows_thumb_up.vm?token=' + station + '&sort_col=thumbsUpDate') page = BeautifulSoup(page) titles = [] artists = [] - search_urls = [] for span in page.findAll('span', attrs={'class':'track_title'}): for attr, value in span.attrs: if attr == 'tracktitle': @@ -37,27 +50,86 @@ def fetch_tracks(stations): if len(titles) == len(artists): i = 0 for title in titles: - search_url = 'http://yt.com/results?search_query=' + urllib.quote_plus(title + ' ' + artists[i]) + search_url = 'http://youtube.com/results?search_query=' + urllib.quote_plus(title + ' ' + artists[i]) search_urls.append(search_url) - print '' + title + ' by', artists[i], '
' i += 1 else: pass ## ERROR return search_urls -def fetch_videos(search_urls): +def fetch_search_video_ids(search_urls): + video_list = [] for url in search_urls: page = urllib.urlopen(url) page = BeautifulSoup(page) - result = page.find(attrs={'class':'yt-video-box'}) - print result - for attr, value in result.contents[1]: - print value + result = page.find('div', attrs={'class':'video-main-content'}) + for attr, value in result.attrs: + if attr == 'id' and len(value[19:]) == 11: + video_list.append(value[19:]) + elif attr == 'id': + print 'odd feedback for url', url, " : ", value[19:] + return video_list + + +def check_for_existing(): + """ Checks the download-folder for existing videos with same id and removes from videolist. """ + videolist = get_video_ids() + filelist = os.listdir(DIR) + for video in copy.deepcopy(videolist): + for files in filelist: + if re.search(video,files): + del videolist[video] + return videolist + +def fetch_videos(videolist): + """ Uses subprocess to trigger a download using youtube-dl of the list created earlier. """ + os.chdir(DIR) + args = shlex.split(YT_DL + ' ' + YT_OPT) + if NOTIFICATIONS: regex = re.compile("\[download\] Destination: (.+)") + for item in videolist: + if item: + thread = subprocess.Popen(args + [item], stdout=subprocess.PIPE) + output = thread.stdout.read() + if NOTIFICATIONS: + video_file = regex.findall(output) + if len(video_file) == 0: + break + thumbnail = hashlib.md5('file://' + DIR + video_file[0]).hexdigest() + '.png' + # Two '/'s instead of three because the path is + # absolute; I'm not sure how this'd work on windows. + title, sep, vid_id = video_file[0].rpartition('---') + title = string.replace(title, '_', ' ') + thumbnail = os.path.join(os.path.expanduser('~/.thumbnails/normal'), thumbnail) + if not os.path.isfile(thumbnail): + opener = urllib2.build_opener() + try: + page = opener.open('http://img.youtube.com/vi/' + item + '/1.jpg') + thumb = page.read() + # The thumbnail really should be saved to + # ~/.thumbnails/normal (Thumbnail Managing + # Standard) + # [http://jens.triq.net/thumbnail-spec/] + # As others have had problems anyway + # (http://mail.gnome.org/archives/gnome-list/2010-October/msg00009.html) + # I decided not to bother at the moment. + temp = tempfile.NamedTemporaryFile(suffix='.jpg') + temp.write(thumb) + temp.flush() + note = pynotify.Notification(title, 'video downloaded', temp.name) + except: + note = pynotify.Notification(title, 'video downloaded', DEFAULT_ICON) + else: + # Generally, this will never happen, because the + # video is a new file. + note = pynotify.Notification(title, 'video downloaded', thumbnail) + note.show() + def main(): stations = fetch_stations(USER) search_urls = fetch_tracks(stations) - fetch_videos(search_urls) + video_list = fetch_search_video_ids(search_urls) + fetch_videos(video_list) if __name__ == "__main__": main() -- 2.30.2