# # Use class instance as return value instead of tuple # _cache = None def find_zip(zipcode): """Return town and information for "zipcode" by querying uszip.com""" global _cache if _cache is None: import shelve _cache = shelve.open("uszip_cache") try: return _cache[zipcode] except KeyError: print "Not in cache" pass import urllib zip = ZipCode() zip.zipcode = zipcode zip.town = "unknown" zip.info = dict() conn = urllib.urlopen("http://www.uszip.com/zip/%s" % zipcode) for line in conn: if "is the zip code of " in line: zip.town = _get_town(line) else: _check_info(line, zip.info) conn.close() if zip.town != "unknown": _cache[zipcode] = zip return zip class ZipCode(object): pass def _get_town(line): """Extract town name if present""" line = _strip_tags(line) key_string = "is the zip code of " try: n = line.index(key_string) except ValueError: return "unknown" else: return line[n + len(key_string):] # Info_list is a list of 2-tuples of (dictionary_key, label) # for information available from uszip.com Info_list = [ ( "population", "Population:" ), ( "housing", "Housing Units:" ), ( "land_area", "Land Area:" ), ( "water_area", "Water Area:" ), ( "latitude", "Latitude:" ), ( "longitude", "Longitude:" ), ] def _check_info(line, info): """Extract information if present""" line = _strip_tags(line) for key, label in Info_list: try: n = line.index(label) except ValueError: continue else: start = n + len(label) stop = start + 1 while stop < len(line): if line[stop].isspace(): break stop += 1 info[key] = line[start:stop].strip() break def _strip_tags(line): """Remove HTML tags from string, leaving only "real" text""" keep = [] in_tag = False for c in line: if in_tag: if c == '>': in_tag = False else: if c == '<': in_tag = True else: keep.append(c) return ''.join(keep).strip()