| prev | Version 1107 (Mon Nov 27 20:46:08 2006) | next |
stdin, stdout, and zero/nonzero exit codes"24.153.22.195""www.third-bit.com"nslookup to talk to DNS directly![[Sockets]](./img/client/sockets.png)
Figure 23.1: Sockets
import sys, socket buffer_size = 1024 # bytes host = '127.0.0.1' # local machine port = 19073 # hope nobody else is using it... message = 'ping!' # what to send # AF_INET means 'Internet socket'. # SOCK_STREAM means 'TCP'. sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.connect((host, port)) # Send the message. sock.send(message) # Receive and display the reply. data = sock.recv(buffer_size) print 'client received', `data` # Tidy up. sock.close()
client received 'pong!'
import sys, socket
buffer_size = 1024 # bytes
host = '' # empty string means 'this machine'
port = 19073 # must agree with client
# Create and bind a socket.
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.bind((host, port))
# Wait for a connection request.
s.listen(True)
sock, addr = s.accept()
print 'Connected by', addr
# Receive and display a message.
data = sock.recv(buffer_size)
print 'server saw', str(data)
# Replace vowels in reply.
data = data.replace('i', 'o')
sock.send(data)
# Tidy up.
sock.close()
Connected by ('127.0.0.1', 1297)
server saw ping!
![[HTTP Request]](./img/client/http_request.png)
Figure 23.3: HTTP Request
"GET": to fetch information"POST": to submit form data or upload files/index.html"HTTP/1.0""HTTP/1.1""Accept: text/html""Accept-Language: en, fr""If-Modified-Since: 16-May-2005""Content-Length" header tells the server how many bytes to read![[HTTP Response]](./img/client/http_response.png)
Figure 23.4: HTTP Response
| Code | Name | Meaning |
|---|---|---|
| 100 | Continue | Client should continue sending data |
| 200 | OK | The request has succeeded |
| 204 | No Content | The server has completed the request, but doesn't need to return any data |
| 301 | Moved Permanently | The requested resource has moved to a new permanent location |
| 307 | Temporary Redirect | The requested resource is temporarily at a different location |
| 400 | Bad Request | The request is badly formatted |
| 401 | Unauthorized | The request requires authentication |
| 404 | Not Found | The requested resource could not be found |
| 408 | Timeout | The server gave up waiting for the client |
| 500 | Internal Server Error | An error occurred in the server that prevented it fulfilling the request |
| 601 | Connection Timed Out | The server did not respond before the connection timed out |
| Table 23.1: HTTP Response Codes | ||
import sys, socket
buffer_size = 1024
HttpRequest = '''GET /greeting.html HTTP/1.0
'''
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.connect(('www.third-bit.com', 80))
sock.send(HttpRequest)
response = ''
while True:
data = sock.recv(buffer_size)
if not data:
break
response += data
sock.close()
print response
HTTP/1.1 200 OK Date: Fri, 03 Mar 2006 18:12:55 GMT Server: Apache/2.0.54 (Debian GNU/Linux) Last-Modified: Fri, 03 Mar 2006 18:12:23 GMT Content-Length: 92 Content-Type: text/html <html> <head><title>Greeting Page</title></head> <body> <h1>Greetings!</h1> </body> </html>
sock.connect are deliberateurlliburllib.urlopen(URL) does what your browser would do if you gave it the URLimport urllib
instream = urllib.urlopen("http://www.third-bit.com/greeting.html")
lines = instream.readlines()
instream.close()
for line in lines:
print line,
readlines wouldn't do the right thing if the thing being read was an imageread to grab the bytes in that caseimport sys, urllib, re
url = sys.argv[1]
instream = urllib.urlopen(url)
page = instream.read()
instream.close()
links = re.findall(r'href=\"[^\"]+\"', page)
temp = set()
for x in links:
x = x[6:-1] # strip off 'href="' and '"'
if x.startswith('http://'):
temp.add(x)
links = list(temp)
links.sort()
for x in links:
print x
$ python spider.py http://www.google.cahttp://groups.google.ca/grphp?hl=en&tab=wg&ie=UTF-8http://news.google.ca/nwshp?hl=en&tab=wn&ie=UTF-8http://scholar.google.com/schhp?hl=en&tab=ws&ie=UTF-8http://www.google.ca/fr
http://www.google.ca?q=Python searches for pages related to Python"?" separates the parameters from the rest of the URL"&"http://www.google.ca/search?q=Python&client=firefox"?" or "&" in a parameter?"%" followed by a 2-digit hexadecimal code"+"| Character | Encoding |
|---|---|
"#" | %23 |
"$" | %24 |
"%" | %25 |
"&" | %26 |
"+" | %2B |
"," | %2C |
"/" | %2F |
":" | %3A |
";" | %3B |
"=" | %3D |
"?" | %3F |
"@" | %40 |
| Table 23.2: URL Encoding | |
http://www.google.ca/search?q=grade+%3D+A%2Burllib has functions to make this easyurllib.quote(str) replaces special characters in str with escape sequencesurllib.unquote(str) replaces escape sequences with charactersurllib.urlencode(params) takes a dictionary and constructs the entire query parameter stringimport urllib
print urllib.urlencode({'surname' : 'Von Neumann', 'forename' : 'John'})
surname=Von+Neumann&forename=John
![[Web Services]](./img/client/web_services.png)
Figure 23.5: Web Services
PyAmazon turns parameters into URL, and converts the XML reply into Python objectsimport sys, amazon
# Format multiple authors' names nicely.
def prettyName(arg):
if type(arg) in (list, tuple):
arg = ', '.join(arg[:-1]) + ' and ' + arg[-1]
return arg
if __name__ == '__main__':
# Get information.
key, asin = sys.argv[1], sys.argv[2]
amazon.setLicense(key)
items = amazon.searchByASIN(asin)
# Handle errors.
if not items:
print 'Nothing found for', asin
if len(items) > 1:
print len(items), 'items found for', asin
# Display information.
item = items[0]
productName = item.ProductName
ourPrice = item.OurPrice
authors = prettyName(item.Authors.Author)
print '%s: %s (%s)' % (authors, productName, ourPrice)
$ python findbook.py 123ABCDEFGHIJKL4MN56 0974514071Greg Wilson: Data Crunching : Solve Everyday Problems Using Java, Python, and more. ($18.87)
| prev | Copyright © 2005-06 Python Software Foundation. | next |