# # Add special methods to ZipCode class so code looks cleaner # _cache = None def find_zip(zipcode): """Return town and information for "zipcode" by querying uszip.com""" global _cache if _cache is None: import shelve _cache = shelve.open("uszip_cache") try: return _cache[zipcode] except KeyError: print "Not in cache" pass import urllib zip = ZipCode(zipcode) conn = urllib.urlopen("http://www.uszip.com/zip/%s" % zipcode) for line in conn: if "is the zip code of " in line: zip.town = _get_town(line) else: _check_info(line, zip.info) conn.close() if zip.town: _cache[zipcode] = zip return zip class ZipCode(object): def __init__(self, zipcode): self.zipcode = zipcode self.town = "unknown" self.info = dict() def __str__(self): if self.town is None: town = "unknown" else: town = self.town return "%s -> %s" % (self.zipcode, town) def _get_town(line): """Extract town name if present""" line = _strip_tags(line) key_string = "is the zip code of " try: n = line.index(key_string) except ValueError: return "unknown" else: return line[n + len(key_string):] # Info_map is map of dictionary key # to label for information available from uszip.com _Info_map = { "population": "Population:", "housing": "Housing Units:", "land_area": "Land Area:", "water_area": "Water Area:", "latitude": "Latitude:", "longitude": "Longitude:", } def _check_info(line, info): """Extract information if present""" line = _strip_tags(line) for key, label in _Info_map.iteritems(): try: n = line.index(label) except ValueError: continue else: start = n + len(label) stop = start + 1 while stop < len(line): if line[stop].isspace(): break stop += 1 info[key] = line[start:stop].strip() break def print_info(zip): for key, value in zip.info.iteritems(): print "\t%s\t%s" % (_Info_map[key], value) def _strip_tags(line): """Remove HTML tags from string, leaving only "real" text""" keep = [] in_tag = False for c in line: if in_tag: if c == '>': in_tag = False else: if c == '<': in_tag = True else: keep.append(c) return ''.join(keep).strip()