"""Store protein backbone data and compute phi and psi angles.""" from cartesian import dihedral class Protein: "A protein consists of one or more chains of amino acids." def __init__(self, name): self.name = name self.chains = [] def addChain(self, chain): "Add another chain into the molecule" self.chains.append(chain) def computePhiPsi(self): "Compute the phi and psi angles of all amino acids" for chain in self.chains: chain.computePhiPsi() class Chain: "A chain consists of amino acids." def __init__(self): self.aa = [] def addAminoAcid(self, aa): "Add another amino acid to this chain" self.aa.append(aa) def computePhiPsi(self): "Compute the phi and psi angles of all amino acids" for i in range(1, len(self.aa)): self.computePhi(self.aa[i - 1], self.aa[i]) for i in range(0, len(self.aa) - 1): self.computePsi(self.aa[i], self.aa[i + 1]) def computePhi(self, prev, this): "Compute the phi angle of this amino acid." this.phi = dihedral(prev.C.coord, this.N.coord, this.CA.coord, this.C.coord) def computePsi(self, this, next): "Compute the psi angle of this amino acid." this.psi = dihedral(this.N.coord, this.CA.coord, this.C.coord, next.N.coord) class AminoAcid: "An amino acid has backbone atoms N, CA (C alpha) and C." def __init__(self, seq, chain, type, N, CA, C): self.seq = seq self.chain = chain self.type = type self.N = N self.CA = CA self.C = C self.phi = None self.psi = None class Atom: "An atom is part of an amino acid and has a name and coordinates." def __init__(self, aa, name, coord): self.aa = aa self.name = name self.coord = coord def readPDB(f, fname): "Read in PDB format input file and return a Protein instance." import logging import pdb p = Protein(fname) c = Chain() rSeq = None rChain = None rType = None bb = {} # backbone map saved = False # has this amino acid been added already? for a in pdb.read(f, fname): if a is None: # Chain break if c.aa: p.addChain(c) c = Chain() continue if a.aName not in [ "N", "CA", "C" ]: # Ignore non-backbone atoms logging.debug("skipping atom %s" % a.aName) continue if rSeq != a.rSeq or rChain != a.rChain or rType != a.rType: rSeq = a.rSeq rType = a.rType rChain = a.rChain bb = {} saved = False bb[a.aName] = a logging.debug("saved atom %s, len=%d", a.aName, len(bb)) if len(bb) == 3 and not saved: # We saw an amino acid since all three # keys (N, CA and C) are in our dictionary aa = AminoAcid(rSeq, rChain, rType, bb["N"], bb["CA"], bb["C"]) c.addAminoAcid(aa) saved = True if c.aa: p.addChain(c) return p # # Input reader registry. # "readers" is a map from file extensions to an input reader. # When called to read a file (via module function "read"), we # check to see if we recognize the file type by looking for # an input reader for the file extension (eg ".pdb"). If so, # we call the function and return its value; otherwise, we # raise an exception and let the upper layers handle it. # readers = {} def registerReader(ext, func): "Register a reader for a files with given extension." readers[ext] = func registerReader(".pdb", readPDB) registerReader(".PDB", readPDB) registerReader(".ent", readPDB) registerReader(".ENT", readPDB) def read(f, fname): "Convert data from input file into a Protein instance." import os.path if fname[0] == '<': ext = ".pdb" else: root, ext = os.path.splitext(fname) try: func = readers[ext] except KeyError: raise IOError("unsupported file extension: %s" % ext) else: return func(f, fname)