# # This file demonstrates using a basic class # # Conrad Huang # November 2, 2004 # # A FASTA file contains a series of sequences, each of which # consists of a one-line description followed by any number of # lines of nucleotide data. The description line begins with # the '>' characters. The data lines consist of single-character # nucleotide or amino acid symbols, but many programs (including # this one) treat any lines that do not begin with '>' as data lines. # FastaReader is an implementation of a FASTA file reader using # classic constructs available in nearly all versions of Python. # This module may be tested by executing it as a Python script. # Command line arguments are treated as the names of FASTA files. # If no FASTA file is specified, the default test case of "ncbi.fa" is used. "Read sequences in FASTA format." class FastaSequence: "A FASTA sequence and its description." def __init__(self, desc, seq): self.description = desc[1:].strip() self.sequence = seq class FastaReader: "Read generic FASTA files. (Classic version)" count = 0 def __init__(self, f): import types if type(f) is types.StringType: # Argument is a string, treat as filename self.f = open(f) self.autoClose = True self.name = f elif type(f) is types.FileType: # Argument is a file object self.f = f self.autoClose = False self.name = "" else: raise ValueError, "expecting string or file object" try: #self.description = self.f.next() self.description = self.f.readline() except StopIteration: self.close() else: self.finished = False FastaReader.count += 1 def __del__(self): self.close() def getName(self): return self.name def nextSequence(self): if self.finished: return None # We have the sequence description from a previous # call or from __init__. Now we read sequence data # until we hit the next description or the end of file. desc = self.description seq = [] while 1: line = self.f.readline() if not line: self.close() break if line[0] == '>': self.description = line break else: seq.append(line.strip()) return self.makeSequence(desc, ''.join(seq)) def makeSequence(self, desc, seq): return FastaSequence(desc, seq) def close(self): if self.f and self.autoClose: self.f.close() self.f = None self.finished = True if __name__ == "__main__": def readFasta(filename): fa = FastaReader(filename) while 1: fs = fa.nextSequence() if fs is None: break print fs.description, len(fs.sequence), fs.sequence print def main(): import sys args = sys.argv[1:] if not args: readFasta("ncbi.fa") else: for filename in args: readFasta(filename) main()