#!/usr/bin/env python from cogent.app.util import CommandLineApplication,\ CommandLineAppResult, ResultPath from cogent.app.parameters import Parameter, FlagParameter, ValuedParameter,\ MixedParameter,Parameters, _find_synonym, is_not_None from cogent.struct.rna2d import ViennaStructure from cogent.core.alignment import DataError from cogent.parse.rnaalifold import MinimalRnaalifoldParser from random import choice __author__ = "Rob Knight" __copyright__ = "Copyright 2007-2012, The Cogent Project" __credits__ = ["Jeremy Widmann", "Rob Knight"] __license__ = "GPL" __version__ = "1.5.3" __maintainer__ = "Sandra Smit" __email__ = "Sandra.Smit@colorado.edu" __status__ = "Production" class RNAfold(CommandLineApplication): """Application controller for RNAfold (in the Vienna RNA package) Manual on: http://www.tbi.univie.ac.at/~ivo/RNA/RNAfold.html http://bioweb.pasteur.fr/docs/man/man/RNAfold.1.html Parameters with default values: -T: 37 (temperature) -d: 1 (only unpaired bases in dangling ends) -S: 1.07 (scale) Input is always written to a file which is used as the application's input. StdErr is suppressed by default, but can be overruled in an instance. """ _parameters = { '-p':MixedParameter(Prefix='-',Name='p',Delimiter='',Value=False), '-C':FlagParameter(Prefix='-',Name='C'), '-T':ValuedParameter(Prefix='-',Name='T',Value=37,Delimiter=' '), '-4':FlagParameter(Prefix='-',Name=4), '-d':MixedParameter(Prefix='-',Name='d',Delimiter='',Value=1), '-noLP':FlagParameter(Prefix='-',Name='noLP'), '-noGU':FlagParameter(Prefix='-',Name='noGU'), '-noCloseGU':FlagParameter(Prefix='-',Name='noCloseGU'), '-e':ValuedParameter(Prefix='-',Name='e',Delimiter=' '), '-P':ValuedParameter(Prefix='-',Name='P',Delimiter=' '), '-nsp':ValuedParameter(Prefix='-',Name='nsp',Delimiter=' '), '-S':ValuedParameter(Prefix='-',Name='S',Value=1.07,Delimiter=' ')} _synonyms = {'Temperature':'-T','Temp':'-T','Scale':'-S'} _command = 'RNAfold' _input_handler = '_input_as_lines' _suppress_stderr = True def _input_as_path(self,filename): """Returns '>"filename"' to redirect input to stdin""" return ''.join(\ ['<', str(super(RNAfold,self)._input_as_path(filename))]) def _input_as_lines(self,data): """Returns '>"temp_filename" to redirect input to stdin""" return ''.join(['<',str(super(RNAfold,self)._input_as_lines(data))]) def _get_result_paths(self,data): """Specifies the paths of output files generated by the application data: the data the instance of the application is called on You always get back: StdOut,StdErr, and ExitStatus RNAfold can produce two additional output files: a secondary structure structure graph. Default name: rna.ps a dot plot of the base pairing matrix. Default name: dp.ps The default names are used for unnamed sequences. Files are created in the current working directory. You can make a sequence named by inserting a line '>name' above it in your input file (or list of sequences). The ss and dp files for named sequences will be written to name_ss.ps and name_dp.ps """ result = {} name_counter = 0 seq_counter = 0 if not isinstance(data,list): #means data is file data = open(data).readlines() for item in data: if item.startswith('>'): name_counter += 1 name = item.strip('>\n') result[(name+'_ss')] =\ ResultPath(Path=(self.WorkingDir+name+'_ss.ps')) result[(name+'_dp')] =\ ResultPath(Path=(self.WorkingDir+name+'_dp.ps'),\ IsWritten=self.Parameters['-p'].isOn()) else: seq_counter += 1 result['SS'] = ResultPath(Path=self.WorkingDir+'rna.ps',\ IsWritten=seq_counter - name_counter > 0) #Secondary Structure result['DP'] = ResultPath(Path=self.WorkingDir+'dot.ps', IsWritten=(self.Parameters['-p'].isOn() and\ seq_counter - name_counter > 0)) #DotPlot return result def get_constrained_fold(sequence, constraint_string, params=None): """Returns secondary structure from RNAfold with constraints. - sequence: RNA sequence object or string. - constraint_string: RNAfold folding constraint string. - params: dict of additional RNAfold parameters. """ #Check that sequence and constraint string exist. if not sequence: raise ValueError, 'No sequence found!' elif not constraint_string: raise ValueError, 'No constraint string found!' sequence = str(sequence) #Check that sequence and constraint_string are equal length. if len(sequence) != len(constraint_string): raise ValueError, 'Sequence and constraint string are not same length!' app = RNAfold(params=params) #Turn on constrained folding. app.Parameters['-C'].on() res = app([sequence,constraint_string]) #Parse out seq, struct string and energy. seq, struct, energy = MinimalRnaalifoldParser(res['StdOut'].readlines())[0] #Clean up after application. res.cleanUp() return seq, struct, energy def get_secondary_structure(sequence, params=None): """Returns secondary structure from RNAfold. - sequence: RNA sequence object or string. - params: dict of additional RNAfold parameters. """ #Check that sequence and constraint string exist. if not sequence: raise ValueError, 'No sequence found!' sequence = str(sequence) app = RNAfold(params=params) res = app([sequence]) #Parse out seq, struct string and energy. seq, struct, energy = MinimalRnaalifoldParser(res['StdOut'].readlines())[0] #Clean up after application. res.cleanUp() return seq, struct, energy class RNAsubopt(CommandLineApplication): """Application controller for RNAsubopt (in the Vienna RNA package) Manual on: http://www.tbi.univie.ac.at/~ivo/RNA/RNAsubopt.html http://bioweb.pasteur.fr/docs/man/man/RNAsubopt.1.html Parameters with default values: -e: 1 (range) -T: 37 (temperature) -d: 2 (dangling ends as in partition function folding) Input is always written to a file which is used as the application's input. StdErr is suppressed by default, but can be overwritten in an instance. """ _parameters = { '-p':ValuedParameter(Prefix='-',Name='p',Delimiter=' '), '-C':FlagParameter(Prefix='-',Name='C'), '-e':ValuedParameter(Prefix='-',Name='e',Delimiter=' ',Value=1), '-ep':ValuedParameter(Prefix='-',Name='ep',Delimiter=' '), '-s':FlagParameter(Prefix='-',Name='s'), '-lodos':FlagParameter(Prefix='-',Name='lodos'), '-T':ValuedParameter(Prefix='-',Name='T',Value=37,Delimiter=' '), '-4':FlagParameter(Prefix='-',Name=4), '-d':MixedParameter(Prefix='-',Name='d',Delimiter='',Value=2), '-noGU':FlagParameter(Prefix='-',Name='noGU'), '-noCloseGU':FlagParameter(Prefix='-',Name='noCloseGU'), '-P':ValuedParameter(Prefix='-',Name='P',Delimiter=' '), '-logML':FlagParameter(Prefix='-',Name='logML'), '-nsp':ValuedParameter(Prefix='-',Name='nsp',Delimiter=' '), '-noLP':FlagParameter(Prefix='-',Name='noLP')} _synonyms = {'Temperature':'-T','Temp':'-T','EnergyRange':'-e','Sort':'-s'} _command = 'RNAsubopt' _input_handler = '_input_as_lines' _suppress_stderr = True def _input_as_path(self,filename): """Returns '>"filename"' to redirect input to stdin Includes quotes to handle file names containing spaces. """ return ''.join(\ ['<',str(super(RNAsubopt,self)._input_as_path(filename))]) def _input_as_lines(self,data): """Returns '>temp_filename to redirect input to stdin Includes quotes to handle file names containing spaces. """ return ''.join(['<',str(super(RNAsubopt,self)._input_as_lines(data))]) class RNAplot(CommandLineApplication): """Application controller for RNAplot (in the Vienna RNA package) Manual on: http://www.tbi.univie.ac.at/~ivo/RNA/RNAplot.html Input is always written to a file which is used as the application's input. StdErr is suppressed by default, but can be overwritten in an instance. """ _parameters = { #-t 0|1 Choose the layout algorithm. Simple radial layout if 0, or # naview if 1. Default is 1. '-t':ValuedParameter(Prefix='-',Name='t',Delimiter=' '),\ #-o ps|gml|xrna|svg # Specify output format. Available formats are: PostScript (ps), # Graph Meta Language (gml), Scalable Vector Graphics (svg), and # XRNA save file (xrna). Output filenames will end in ".ps" ".gml" # ".svg" ".ss", respectively. Default is ps. '-o':ValuedParameter(Prefix='-',Name='o',Delimiter=' '),\ #--pre string --post string # Add annotation macros to postscript file, and add the postscript # code in "string" just before (--pre) or after (--post) the code # to draw the structure. This is an easy way to add annotation, # e.g to mark position 15 with circle use --post "15 cmark" '--pre':ValuedParameter(Prefix='--',Name='pre',Delimiter=' '),\ '--post':ValuedParameter(Prefix='--',Name='post',Delimiter=' '),\ } _command = 'RNAplot' _input_handler = '_input_as_lines' _suppress_stderr = True FILETYPE_TO_EXTENSION = {'ps':'.ps',\ 'gml':'.gml',\ 'svg':'.svg',\ 'xrna':'.ss' } def getHelp(self): """Method that points to the RNAplot documentation.""" help_str = \ """ See RNAplot documentation at: http://www.tbi.univie.ac.at/~ivo/RNA/RNAplot.html """ return help_str def _get_tmp_seqname(self): """Returns a temporary name for sequence. - Useful since RNAplot names plot files according to sequence name when available. Will use the 'rna' prefix if no sequence name is supplied. - Note only takes first 12 characters as the name. So this function will only use make a name 9 long with 'tmp' as a prefix. """ chars = "abcdefghigklmnopqrstuvwxyz" picks = chars + chars.upper() + "0123456790" return 'tmp'+''.join([choice(picks) for i in range(9)]) def _input_as_path(self,filename): """Returns '>"filename"' to redirect input to stdin Includes quotes to handle file names containing spaces. """ return ''.join(\ ['<',str(super(RNAplot,self)._input_as_path(filename))]) def _input_as_lines(self,data): """Returns '>temp_filename to redirect input to stdin Includes quotes to handle file names containing spaces. """ return ''.join(['<',str(super(RNAplot,self)._input_as_lines(data))]) def _get_outfile_extension(self): """Returns file extension of RNAplot. """ if not self.Parameters['-o'].isOn(): return '.ps' else: filetype = self._absolute(str(self.Parameters['-o'].Value)) return self.FILETYPE_TO_EXTENSION[filetype] def _get_result_paths(self,data): """Specifies the paths of output files generated by the application data: the data the instance of the application is called on You always get back: StdOut,StdErr, and ExitStatus RNAplot can produce one output files: a secondary structure structure graph. The default names are used for unnamed sequences. Files are created in the current working directory. You can make a sequence named by inserting a line '>name' above it in your input file (or list of sequences). The ss files for named sequences will be written to name_ss.ps. """ result = {} name_counter = 0 seq_counter = 0 if not isinstance(data,list): #means data is file data = open(data).readlines() res_extension = self._get_outfile_extension() for item in data: if item.startswith('>'): name_counter += 1 name = item.strip('>\n') result[(name+'_ss')] =\ ResultPath(Path=(self.WorkingDir+name+'_ss'+res_extension)) else: seq_counter += 1 if (seq_counter/2 - name_counter > 0): result['SS'] = ResultPath(Path=self.WorkingDir+'rna'+res_extension,\ IsWritten=True) #Secondary Structure return result def plot_from_seq_and_struct(seq, struct,seqname=None, params=None): """Returns plot of structure given seq and struct. - seq: sequence corresponding to struct. - Can be anything that behaves as a string same length as struct. - struct: Vienna structure corresponding to seq. Must be a valid Vienna structure. """ seq = str(seq) #Construct ViennaStructure. Object will handle errors in struct string. struct = ViennaStructure(struct) if len(seq) != len(struct): raise DataError, 'Sequence and structure are not same length!' app = RNAplot(WorkingDir='/tmp',\ params=params) if seqname is None: seqname = app._get_tmp_seqname() res = app(['>'+seqname,seq,struct]) plot = res[seqname+'_ss'].read() res.cleanUp() return plot