#
# Add caching so we don't visit web site so often
#

_cache = None

def find_zip(zipcode):
	"""Return town and information for "zipcode" by querying uszip.com"""
	global _cache
	if _cache is None:
		import shelve
		_cache = shelve.open("uszip_cache")
	try:
		return _cache[zipcode]
	except KeyError:
		print "Not in cache"
		pass
	import urllib
	town = "unknown"
	info = dict()
	conn = urllib.urlopen("http://www.uszip.com/zip/%s" % zipcode)
	for line in conn:
		if "is the zip code of " in line:
			town = _get_town(line)
		else:
			_check_info(line, info)
	conn.close()
	if town != "unknown":
		_cache[zipcode] = (town, info)
	return town, info

def _get_town(line):
	"""Extract town name if present"""
	line = _strip_tags(line)
	key_string = "is the zip code of "
	try:
		n = line.index(key_string)
	except ValueError:
		return "unknown"
	else:
		return line[n + len(key_string):]

# Info_list is a list of 2-tuples of (dictionary_key, label)
# for information available from uszip.com
Info_list = [
	(	"population",	"Population:"		),
	(	"housing",	"Housing Units:"	),
	(	"land_area",	"Land Area:"		),
	(	"water_area",	"Water Area:"		),
	(	"latitude",	"Latitude:"		),
	(	"longitude",	"Longitude:"		),
]
def _check_info(line, attrs):
	"""Extract attribute if present"""
	line = _strip_tags(line)
	for key, label in Info_list:
		try:
			n = line.index(label)
		except ValueError:
			continue
		else:
			start = n + len(label)
			stop = start + 1
			while stop < len(line):
				if line[stop].isspace():
					break
				stop += 1
			attrs[key] = line[start:stop].strip()
			break

def _strip_tags(line):
	"""Remove HTML tags from string, leaving only "real" text"""
	keep = []
	in_tag = False
	for c in line:
		if in_tag:
			if c == '>':
				in_tag = False
		else:
			if c == '<':
				in_tag = True
			else:
				keep.append(c)
	return ''.join(keep).strip()