urllib
urllib.urlopen(URL) does what your browser would do if you gave it the URLimport urllib
instream = urllib.urlopen("http://www.third-bit.com/greeting.html")
lines = instream.readlines()
instream.close()
for line in lines:
print line,
readlines wouldn't do the right thing if the thing being read was an imageread to grab the bytes in that case$ python spider.py http://www.google.cahttp://groups.google.ca/grphp?hl=en&tab=wg&ie=UTF-8http://news.google.ca/nwshp?hl=en&tab=wn&ie=UTF-8http://scholar.google.com/schhp?hl=en&tab=ws&ie=UTF-8http://www.google.ca/fr
import sys, urllib, re
url = sys.argv[1]
instream = urllib.urlopen(url)
page = instream.read()
instream.close()
links = re.findall(r'href=\"[^\"]+\"', page)
temp = set()
for x in links:
x = x[6:-1] # strip off 'href="' and '"'
if x.startswith('http://'):
temp.add(x)
links = list(temp)
links.sort()
for x in links:
print x
http://www.google.ca?q=Python searches for pages related to Python"?" separates the parameters from the rest of the URL"&"
http://www.google.ca/search?q=Python&client=firefox
"?" or "&" in a parameter?"%" followed by a 2-digit hexadecimal code"+"
| Character | Encoding |
|---|---|
"#"
|
%23
|
"$"
|
%24
|
"%"
|
%25
|
"&"
|
%26
|
"+"
|
%2B
|
","
|
%2C
|
"/"
|
%2F
|
":"
|
%3A
|
";"
|
%3B
|
"="
|
%3D
|
"?"
|
%3F
|
"@"
|
%40
|
| Table 3: URL Encoding | |
http://www.google.ca/search?q=grade+%3D+A%2B
urllib has functions to make this easyurllib.quote(str) replaces special characters in str with escape sequencesurllib.unquote(str) replaces escape sequences with charactersurllib.urlencode(params) takes a dictionary and constructs the entire query parameter stringimport urllib
print urllib.urlencode({'surname' : 'Von Neumann', 'forename' : 'John'})
surname=Von+Neumann&forename=John
Figure 5: Web Services
PyAmazon
turns parameters into URL, and converts the XML reply into Python objectsimport sys, amazon
# Format multiple authors' names nicely.
def prettyName(arg):
if type(arg) in (list, tuple):
arg = ', '.join(arg[:-1]) + ' and ' + arg[-1]
return arg
if __name__ == '__main__':
# Get information.
key, asin = sys.argv[1], sys.argv[2]
amazon.setLicense(key)
items = amazon.searchByASIN(asin)
# Handle errors.
if not items:
print 'Nothing found for', asin
if len(items) > 1:
print len(items), 'items found for', asin
# Display information.
item = items[0]
productName = item.ProductName
ourPrice = item.OurPrice
authors = prettyName(item.Authors.Author)
print '%s: %s (%s)' % (authors, productName, ourPrice)
$ python findbook.py 123ABCDEFGHIJKL4MN56 0974514071Greg Wilson: Data Crunching : Solve Everyday Problems Using Java, Python, and more. ($18.87)
Figure 5: CGI Data Processing Cycle
| Name | Purpose | Example |
|---|---|---|
REQUEST_METHOD
|
What kind of HTTP request is being handled |
GET or POST
|
SCRIPT_NAME
|
The path to the script that's executing |
/cgi-bin/post_photo.py
|
QUERY_STRING
|
The query parameters following "?" in the URL |
name=mydog.jpg&expires=never
|
CONTENT_TYPE
|
The type of any extra data being sent with the request |
img/jpeg
|
CONTENT_LENGTH
|
How much extra data is being sent with the request (in bytes) |
17290
|
| Table 4: Important CGI Environment Variables | ||
CONTENT_LENGTH bytes to the CGI on standard input
"Content-Type" header to specify the MIME type of the data being sent| Family | Specific Type | Describes |
|---|---|---|
| Text |
text/html
|
Web pages |
| Image |
image/jpeg
|
JPEG-format image |
| Audio |
audio/x-mp3
|
MP3 audio file |
| Video |
video/quicktime
|
Apple Quicktime video format |
| Application-specific data |
application/pdf
|
Adobe PDF document |
| Table 5: Example Mime Types | ||
Content-Type header to tell the client to expect HTML…#!/usr/bin/env python # Headers and an extra blank line print 'Content-type: text/html' print # Body print '<html><body><p>Hello, CGI!</p></body></html>'
http://plato.cgl.ucsf.edu/bmi280/cgi-bin/hello_cgi.py
cgi-bin directory
Figure 6: Basic CGI Output
#!/usr/bin/env python
import os, cgi
# Headers and an extra blank line
print 'Content-type: text/html'
print
# Body
print '<html><body>'
keys = os.environ.keys()
keys.sort()
for k in keys:
print '<p>%s: %s</p>' % (cgi.escape(k), cgi.escape(os.environ[k]))
print '</body></html>'
Figure 7: Environment Variable Output
Figure 4: A Simple Form
<html>
<body>
<form action="/bmi280/cgi-bin/print_params.py">
<p>Sequence: <input type="text" name="sequence"/>
Search type:
<select name="match">
<option>Exact match</option>
<option>Similarity match</option>
<option>Sub-match</option>
</select>
</p>
<p>Programs:
<input type="checkbox" name="frog">
FROG (version 1.1)
</input>
<input type="checkbox" name="frog2">
FROG (2.0 beta)
</input>
<input type="checkbox" name="bayeshart">
Bayes-Hart
</input>
</p>
<p>
<input type="submit" value="Submit Query"/>
<input type="reset" value="Reset"/>
</p>
</form>
</body>
</html>
<input/> element has a name attributeos.environ['REQUEST_METHOD']: "POST"
os.environ['SCRIPT_NAME']: "/cgi-bin/simple_form.py"
os.environ['CONTENT_TYPE']: "application/x-www-form-urlencoded"
os.environ['REQUEST_LENGTH']: "80"
sequence=GATTACA&search_type=Similarity+match&program=FROG-11&program=Bayes-Hart
cgi module insteadFieldStorage
FieldStorage object is created, it reads and stores information contained in the URL and environmentsys.stdin
#!/usr/bin/env python
import cgi
print 'Content-type: text/html'
print
print '<html><body>'
form = cgi.FieldStorage()
for key in form.keys():
value = form.getvalue(key)
if isinstance(value, list):
value = '[' + ', '.join(value) + ']'
print '<p>%s: %s</p>' % (cgi.escape(key), cgi.escape(value))
print '</body></html>'
| URL | Value of a
|
Value of b
|
|---|---|---|
http://www.third-bit.com/swc/show_params.py?a=0
|
"0"
|
None |
http://www.third-bit.com/swc/show_params.py?a=0&b=hello
|
"0"
|
"hello"
|
http://www.third-bit.com/swc/show_params.py?a=0&b=hello&a=22
|
[0, 22]
|
"hello"
|
| Table 6: Example Parameter Values | ||
import cgitb; cgitb.enable() to the top of the programcgitb is the CGI traceback moduleFieldStorage value is a string or a list is tediousFieldStorage.getfirst(name) to get the unique valueFieldStorage.getlist(name) always returns a list of valuesname
Figure 9: Three Tier Architecture
Hi, is anyone reading this site? I was wondering the same thing. I wasn't sure if we were supposed to post here. Good point. Is there way to delete messages?
newmessage is there, append it, and display resultsnewmessage
isn't there, someone's visiting the page, rather than submitting the form# Get existing messages.
infile = open('messages.txt', 'r')
lines = [x.rstrip() for x in infile.readlines()]
infile.close()
# Add more data?
form = cgi.FieldStorage()
if form.has_key('newmessage'):
lines.append(form.getfirst('newmessage'))
outfile = open('messages.txt', 'w')
for line in lines:
print >> outfile, line
outfile.close()
XMLHttpRequest()// Handle the XMLHttpRequest function sendRequest(sql) { xmlhttp = new XMLHttpRequest(); if (xmlhttp != null) { xmlhttp.onreadystatechange = getData;// getData is our callback method xmlhttp.open("GET", "/cgi-bin/getBmi280Table.py?sql="+sql, true); xmlhttp.send(null); } }// This method gets called whenever the object state changes. function getData() {// Are we complete? if (xmlhttp.readyState == 4) {// Yes, do we have a good http status? if (xmlhttp.status == 200) {// yes, responseXML will hold the XML document, which we can address using the DOM // if we only wanted the raw text, we could get xmlhttp.responseText var response = xmlhttp.responseXML;// Use the DOM to get the results table from the server var newChild = response.getElementById("results_table");// Get a handle on the results div var tableDiv = document.getElementById("results_div");// Add in our results table tableDiv.appendChild(newChild); } else { alert("Unable to contact AJAX server: "+xmlhttp.status); } } }
| Method | Description |
abort() | Cancels the current request |
getAllResponseHeaders() |
Returns the complete set of http headers as a string |
getResponseHeader("headername") |
Returns the value of the specified http header |
open("method","URL",async,"username","password") |
Specifies the method, URL, and other optional attributes of a request
The method parameter can have a value of "GET", "POST", or "PUT" (use "GET" when requesting data and use "POST" when sending data (especially if the length of the data is greater than 512 bytes. The URL parameter may be either a relative or complete URL. The async parameter specifies whether the request should be handled asynchronously or not. true means that script processing carries on after the send() method, without waiting for a response. false means that the script waits for a response before continuing script processing |
send(content) |
Sends the request |
setRequestHeader("label", "value") |
Adds a label/value pair to the http header to be sent |
| Table 7: XMLHttpRequest Methods | |
|---|---|
| Property | Description |
onreadystatechange |
An event handler for an event that fires at every state change |
readyState |
Returns the state of the object:
0 = uninitialized |
responseText |
Returns the response as a string |
responseXML |
Returns the response as XML. This property returns an XML document object, which can be examined and parsed using W3C DOM node tree methods and properties |
status |
Returns the HTTP status as a number (e.g. 404 for "Not Found" or 200 for "OK") |
statusText |
Returns the HTTP status as a string (e.g. "Not Found" or "OK") |
| Table 8: XMLHttpRequest Properties | |
|---|---|
cgi moduleXMLHttpRequest.responseXML:#! /usr/bin/python import cgi import sys print "Content-type: text/xml" print ""# We want this to be interpreted as HTML by the client print '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">' print '<html xmlns="http://www.w3.org/1999/xhtml">'
bmi280/bmi280.svg: the XHTML+SVG file that makes up the front-endbmi280/css/bmi280.css: the stylesheet for both the XHTML and SVGbmi280/js/bmi280.js: the JavaScript that drives the applicationcgi-bin/getBmi280Table.py: the server-side component<?xml version="1.0" encoding="UTF-8"?> <html xmlns="http://www.w3.org/1999/xhtml" xmlns:svg="http://www.w3.org/2000/svg" xml:lang="en" lang="en"> <head> <script type="text/javascript" src="js/bmi280.js"></script> </head> <link rel="stylesheet" type="text/css" href="css/bmi280.css"/> <body> <h3>BMI280 - AJAX Example</h3> <svg:svg id="svg-root" width="100%" viewBox="0 0 800 100" version="1.1" ><!-- Surrounding Rectangle --> <svg:rect x="0" y="0" width="800" height="100" style="stroke: blue; fill: none;"/><!-- Recipe Entity --> <svg:rect x="40" y="30" width="60" height="40" class="entity" onclick="showInput('recipe_input', this);"/> <svg:text x="50" y="52" class="label1">Recipe</svg:text> <svg:line x1="100" y1="50" x2="330" y2="50" stroke="yellow" stroke-width="2"/><!-- Fragment Entity --> <svg:rect x="330" y="30" width="60" height="40" class="entity" onclick="showInput('fragment_input', this);"/> <svg:text x="334" y="52" class="label1">Fragment</svg:text> <svg:line x1="390" y1="50" x2="630" y2="50" stroke="yellow" stroke-width="2"/><!-- Gene Entity --> <svg:rect x="630" y="30" width="60" height="40" class="entity" onclick="showInput('gene_input', this);"/> <svg:text x="647" y="52" class="label1">Gene</svg:text><!-- Produces relationship --> <svg:rect x="200" y="30" width="40" height="40" class="relationship" transform="rotate(-45,220,50)" onclick="showInput('recipe_input_join', this);"/> <svg:text x="201" y="52" class="label2">Produces</svg:text><!-- Contains relationship --> <svg:rect x="500" y="30" width="40" height="40" class="relationship" transform="rotate(-45,520,50)" onclick="showInput('gene_input_join', this);"/> <svg:text x="501" y="52" class="label2">Contains</svg:text><!-- Links and orders --> </svg:svg><!-- This is the form: Note that each <span> has an ID and a class that we will use to control whether we show the containing input field or not. Also note specifically the way we call getTable with the arguments we want. --> <form> <span id="recipe_input" class="hidden"> Recipe Name: <input type="text" onchange="getTable('RECIPE','RECIPE.NAME', this, 'Name,File,Owner',null);"/> </span> <span id="recipe_input_join" class="hidden"> Recipe Name: <input type="text" onchange="getTable('RECIPE,PRODUCES,FRAG','RECIPE.NAME', this, 'RECIPE.Name,RECIPE.Owner,PRODUCES.Date,FRAG.Name,FRAG.Sequence','RECIPE.RCP=PRODUCES.RCP and PRODUCES.FRAG=FRAG.FRAG');"/> </span> <span id="fragment_input" class="hidden" style="position: absolute; left: 35%;"> Fragment Name: <input type="text" onchange="getTable('FRAG','FRAG.NAME', this, 'Name,Sequence,Circular',null);"/> </span> <span id="gene_input_join" class="hidden"> Gene Name: <input type="text" onchange="getTable('FRAG,CONTAINS,GENE','GENE.NAME', this, 'FRAG.Name,FRAG.Sequence,GENE.Name,CONTAINS.Start,CONTAINS.End','FRAG.FRAG=CONTAINS.FRAG and GENE.ID=CONTAINS.GENE');"/> </span> <span id="gene_input" class="hidden" style="position: absolute; left: 70%;"> Gene Name: <input type="text" onchange="getTable('GENE','GENE.NAME', this, 'Name,Protein,StartNum',null);"/> </span> </form><!-- We'll write a header into this <h3> when we get the data --> <h3 id="table_header" class="table_header"> </h3><!-- We'll write the results table into this when we get the data --> <div id="results_div"> </div> </body> </html>
rect.entity { fill: purple; stroke-width: 2px;}
rect.relationship { fill: lightgreen; stroke-width: 2px;}
text.label1 {fill:white; font-size:8pt; font-family: arial; font-weight: bold;}
text.label2 {fill:blue; font-size:6pt; font-family: arial; font-weight: bold;}
span.hidden {visibility: hidden; }
span.shown {visibility: visible; }
tr.table-header {font-weight: bold; text-align: center; color: green; font-family: arial;}
h3.table_header {font-family: arial; text-align: center;}
table {font-family: arial; font-size: 80%;}
// Handle the XMLHttpRequest function sendRequest(sql) { xmlhttp = new XMLHttpRequest(); if (xmlhttp != null) { xmlhttp.onreadystatechange = getData;// getData is our callback method xmlhttp.open("GET", "/bmi280/cgi-bin/getBmi280Table.py?sql="+sql, true); xmlhttp.send(null); } }// This method gets called whenever the object state changes function getData() {// Are we complete? if (xmlhttp.readyState == 4) {// Yes, do we have a good http status? if (xmlhttp.status == 200) {// yes, responseXML will hold the XML document, which we can address using the DOM // if we only wanted the raw text, we could get xmlhttp.responseText var response = xmlhttp.responseXML;// Use the DOM to get the results table from the server var newChild = response.getElementById("results_table");// Get a handle on the results div var tableDiv = document.getElementById("results_div");// Add in our results table tableDiv.appendChild(newChild); } else { alert("Unable to contact AJAX server: "+xmlhttp.status); } } }
var elementShown = null; var xmlhttp = null; var selectedRect = null;// ShowInput just controls the presentation of the name // of the row we are looking for function showInput(elementID, rect) {// Get a pointer to the element that called us var element = document.getElementById(elementID);// Do we already have a text input element showing? if (elementShown != null) elementShown.className = "hidden";// Yes, hide it // Do we already have a rectangle highlighted? if (selectedRect != null) selectedRect.setAttributeNS(null, "stroke", "none");// Yes, hide it // Show the text input element.className = "shown"; elementShown = element;// Outline the element the user clicked on // Note that we need to use setAttributeNS for SVG attributes rect.setAttributeNS(null, "stroke", "black"); selectedRect = rect; }// This is the method that gets called when a text field is changed function getTable(tableName, column, textField, fields, where) { var text = textField.value;// This contains the value the user entered // Now, create the SELECT statement var sql = 'SELECT '+fields+' from '+tableName; if (text.length >= 2 || where != null) { sql += ' where '; if (text.length >= 2) { sql += column+' = "'+text+'"'; if (where != null) { sql += ' AND '+where; } } else { sql += where; } } sql += ';';// Uncomment the next line to see what we pulled together // alert(sql);// Issue the request. Because our XMLHttpRequest call is // asynchronous, this will return immediately sendRequest(sql);// Clear the text field textField.value = "";// Add a header header = document.getElementById("table_header"); header.innerHTML = tableName;// Clear the old table var tableDiv = document.getElementById("results_div"); while (tableDiv.firstChild) { tableDiv.removeChild(tableDiv.firstChild); } }
#! /usr/local/bin/python import cgi import cgitb import sys import sqlite3 def returnError(errorString): print """<html xmlns="http://www.w3.org/1999/xhtml"> <body> <h3 id="results_table" style="color:red;">%s</h3> </body> </html>"""%errorString cgitb.enable() print "Content-type: text/xml" print "" print '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'# Get the form data form = cgi.FieldStorage() if not (form.has_key("sql")): returnError("No SQL string?") sys.exit(0) sqlStatement = form["sql"].value rows = None try: conn = sqlite3.connect ("/home/socr/b/bmi280/bmi280.db") cursor = conn.cursor() cursor.execute(sqlStatement) rows = cursor.fetchall() cursor.close() conn.commit() conn.close() except sqlite3.Error, e: returnError(e.args[1]) sys.exit(0) print '<html xmlns="http://www.w3.org/1999/xhtml">' print '<body>' print '<table id="results_table" border="1" width="80%" align="center">' print '<tr class="table-header">', for column in cursor.description: print '<td>'+column[0]+'</td>', print '</tr>' for row in rows: print '<tr>', for cell in row: print '<td>'+str(cell)+'</td>', print '</tr>' print '</table>' print '</body>' print '</html>'
Kid
in Pythonmessage_form.py opens messages.txt, reads lines, closes filemessages.txt, reads the same lines, closes filePython Cookbook
includes a generic file locking function that works on both Unix and Windows# Get existing messages.
msgfile = open('messages.txt', 'r+')
fcntl.flock(msgfile.fileno(), fcntl.LOCK_EX)
lines = [x.rstrip() for x in msgfile.readlines()]
# Add more data?
form = cgi.FieldStorage()
if form.has_key('newmessage'):
lines.append(form.getfirst('newmessage'))
msgfile.seek(0)
for line in lines:
print >> msgfile, line
# Unlock and close.
fcntl.flock(msgfile.fileno(), fcntl.LOCK_UN)
msgfile.close()
Figure 10: Cookies
Cookie.SimpleCookie
SmartCookie: it is potentially insecure"HTTP_COOKIE"
SimpleCookie
"HTTP_COOKIE" value to the cookie's load method# Get old count.
count = 0
if os.environ.has_key('HTTP_COOKIE'):
cookie = Cookie.SimpleCookie()
cookie.load(os.environ['HTTP_COOKIE'])
if cookie.has_key('count'):
count = int(cookie['count'].value)
# Create new count.
count += 1
cookie = Cookie.SimpleCookie()
cookie['count'] = count
# Display.
print 'Content-Type: text/html'
print cookie
print
print '<html><body>'
print '<p>Visits: %d</p>' % count
print '</body></html>'
time.asctime(time.gmtime()) to create the value