works, but fails unpleasantly when no result found dl.future/master master
authorDylan Lloyd <dylan@miniscule.localdomain>
Wed, 27 Jun 2012 05:57:39 +0000 (01:57 -0400)
committerDylan Lloyd <dylan@miniscule.localdomain>
Wed, 27 Jun 2012 05:57:39 +0000 (01:57 -0400)
patent.py [moved from search.py with 56% similarity]

similarity index 56%
rename from search.py
rename to patent.py
index d71f3ed..26a4c33 100755 (executable)
--- a/search.py
+++ b/patent.py
@@ -1,15 +1,27 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python
 
 import json
 import urllib2
 from BeautifulSoup import BeautifulSoup
 
+import cgi
+import cgitb
+cgitb.enable()
+
+import os
+from urlparse import urlparse
+import urllib
+
+
 patents = {}
+query = ''
 
 
 class Parse(object):
     def __init__(self):
-        response = urllib2.urlopen('http://patft.uspto.gov/netacgi/nph-Parser?Sect1=PTO2&Sect2=HITOFF&p=1&u=%2Fnetahtml%2FPTO%2Fsearch-bool.html&r=0&f=S&l=5&TERM1=duck&FIELD1=&co1=AND&TERM2=&FIELD2=&d=PTXT');
+        query =  urllib.quote(urlparse(os.environ['REQUEST_URI']).query[2:])
+        usptoURL = 'http://patft.uspto.gov/netacgi/nph-Parser?Sect1=PTO2&Sect2=HITOFF&p=1&u=%2Fnetahtml%2FPTO%2Fsearch-bool.html&r=0&f=S&l=5&TERM1=' + query + '&FIELD1=&co1=AND&TERM2=&FIELD2=&d=PTXT';
+        response = urllib2.urlopen(usptoURL);
         html = response.read()
         self.soup = BeautifulSoup(html)
         table = self.soup.findAll('table')[1]
@@ -23,7 +35,7 @@ class Parse(object):
                 if i == 1:
                     self.GUID = str(td.a.text)
                 elif i == 3:
-                    self.href = str(td.a['href'])
+                    self.href = "http://patft.uspto.gov" + str(td.a['href'])
                     self.description = str(td.text.replace('\n','').replace('\t',''))
                 i += 1
             patents[self.GUID] = {"href" : self.href, "description" : self.description};
@@ -31,6 +43,4 @@ class Parse(object):
 
 if __name__ == "__main__":
     parsed = Parse()
-    print json.dumps(patents)
-
-
+    print "Content-Type: application/json", "\n\n", json.dumps(patents)