a5_2.py

# Assumptions built in to this code:
# - urlopen will never fail
# - chemicalName is all on one line with no markup in between tags

def get_pdb_ligands(code):
	"""Return list of ligand names for given PDB code."""
	import urllib
	f = urllib.urlopen("http://www.rcsb.org/pdb/rest/ligandInfo?structureId=%s" % code)
	ligands = list()
	for line in f:
		extract_ligand(line, ligands)
	f.close()
	return ligands

def extract_ligand(line, ligand_list):
	"""Extract ligand name if given line contains a chemical name."""
	start_tag = "<chemicalName>"
	end_tag = "</chemicalName>"
	tag_index = line.find(start_tag)
	if tag_index < 0:
		return
	end_tag_index = line.find(end_tag)
	if end_tag_index < 0:
		return
	name_index = tag_index + len(start_tag)
	name = line[name_index:end_tag_index].strip()
	ligand_list.append(name)

print get_pdb_ligands("4HHB")
print get_pdb_ligands("3FX2")