"""Store protein backbone data and compute phi and psi angles.""" # Protein p is a list of chains. # Chain c is a list of amino acids. # Each amino acid is a 8-list of [seq, chain, type, N, CA, C, phi, psi]. # N, CA, C is each a coordinate (a cartesian module Point). # # Constructors # def makeAminoAcid(seq, chain, type, N, CA, C, phi, psi): "Amino acid constructor" return [ seq, chain, type, N, CA, C, phi, psi ] # # Access functions for proteins, chains and amino acids # def chains(p): return p def aminoAcids(c): return c def aaSeq(aa): return aa[0] def aaChain(aa): return aa[1] def aaType(aa): return aa[2] def aaAtomN(aa): return aa[3] def aaAtomCA(aa): return aa[4] def aaAtomC(aa): return aa[5] def aaPhi(aa): return aa[6] def aaPsi(aa): return aa[7] # # Functions that actually do some work # def computePhiPsi(p): "Compute the phi and psi angles of all amino acids in all chains." for c in p: _computeChainPhiPsi(c) def _computeChainPhiPsi(c): "Compute the phi and psi angles of all amino acids" for i in range(1, len(c)): c[i][6] = _computePhi(c[i - 1], c[i]) for i in range(0, len(c) - 1): c[i][7] = _computePsi(c[i], c[i + 1]) def _computePhi(prev, this): "Compute the phi angle of this amino acid." from cartesian import dihedral return dihedral(prev[5], this[3], this[4], this[5]) def _computePsi(this, next): "Compute the psi angle of this amino acid." from cartesian import dihedral return dihedral(this[3], this[4], this[5], next[3]) def readPDB(f, fname): "Read in PDB format input file and return a Protein instance." import logging import pdb from cartesian import Point p = [] c = [] rSeq = None rChain = None rType = None bb = {} # backbone map saved = False # has this amino acid been added already? for a in pdb.read(f, fname): if a is None: # Chain break if c: p.append(c) c = [] continue atomName, resType, resChain, resSeq, x, y, z = a atomName = atomName.strip() if atomName not in [ "N", "CA", "C" ]: # Ignore non-backbone atoms logging.debug("skipping atom %s" % atomName) continue if rSeq != resSeq or rChain != resChain or rType != resType: rSeq = resSeq rType = resType rChain = resChain bb = {} saved = False bb[atomName] = Point(x, y, z) logging.debug("saved atom %s, len=%d", atomName, len(bb)) if len(bb) == 3 and not saved: # We saw an amino acid since all three # keys (N, CA and C) are in our dictionary aa = makeAminoAcid(rSeq, rChain, rType, bb["N"], bb["CA"], bb["C"], None, None) c.append(aa) saved = True if c: p.append(c) return p # # Input reader registry. # "readers" is a map from file extensions to an input reader. # When called to read a file (via module function "read"), we # check to see if we recognize the file type by looking for # an input reader for the file extension (eg ".pdb"). If so, # we call the function and return its value; otherwise, we # raise an exception and let the upper layers handle it. # readers = {} def registerReader(ext, func): "Register a reader for a files with given extension." readers[ext] = func registerReader(".pdb", readPDB) registerReader(".PDB", readPDB) registerReader(".ent", readPDB) registerReader(".ENT", readPDB) def read(f, fname): "Convert data from input file into a Protein instance." import os.path if fname[0] == '<': ext = ".pdb" else: root, ext = os.path.splitext(fname) try: func = readers[ext] except KeyError: raise IOError("unsupported file extension: %s" % ext) else: return func(f, fname)