a5_2.py

# Assumptions built in to this code:
# - urlopen will never fail
# - chemicalName is all on one line with no markup in between tags

def get_pdb_ligands(code):
    """Return list of ligand names for given PDB code."""
    import urllib.request, io
    url = "http://www.rcsb.org/pdb/rest/ligandInfo?structureId=%s" % code
    ligands = list()
    with io.TextIOWrapper(urllib.request.urlopen(url)) as f:
        for line in f:
            extract_ligand(line, ligands)
    return ligands

def extract_ligand(line, ligand_list):
    """Extract ligand name if given line contains a chemical name."""
    start_tag = "<chemicalName>"
    end_tag = "</chemicalName>"
    tag_index = line.find(start_tag)
    if tag_index < 0:
        return
    end_tag_index = line.find(end_tag)
    if end_tag_index < 0:
        return
    name_index = tag_index + len(start_tag)
    name = line[name_index:end_tag_index].strip()
    ligand_list.append(name)

print(get_pdb_ligands("4HHB"))
print(get_pdb_ligands("3FX2"))