def getDescription(pdbidList): dom = _getFromPDB(pdbidList) return _extractInfo(dom) def _getFromPDB(pdbidList): "Return the PDB document object model (DOM) for the given id" PDB_URL = "http://www.rcsb.org/pdb/rest/describePDB" req = "%s?structureId=%s" % (PDB_URL, ",".join(pdbidList)) print req from urllib2 import urlopen f = urlopen(req) from xml.dom.minidom import parse dom = parse(f) f.close() return dom def _extractInfo(dom): "Extract fields for all PDB elements in document" info = list() for e in dom.getElementsByTagName("PDB"): attrMap = e.attributes d = dict() for i in range(attrMap.length): a = attrMap.item(i) d[a.name] = a.value info.append(d) return info # # Functions for manipulating dictionaries returned by "getDescription" # def name(d): return d["structureId"].lower() def title(d): return d["title"].title() def dump(d): for name, value in d.iteritems(): print "\t%s:\t%s" % (name, value) if __name__ == "__main__": import pprint pprint.pprint(getDescription(["4hhb", "1gcn"]))