#!/usr/bin/env python
 
from cogent.util.unit_test import TestCase, main
from cogent.core.info import Info
from cogent.parse.comrna import comRNA_parser,common
 
__author__ = "Shandy Wikman"
__copyright__ = "Copyright 2007-2012, The Cogent Project"
__contributors__ = ["Shandy Wikman"]
__license__ = "GPL"
__version__ = "1.5.3-dev"
__maintainer__ = "Shandy Wikman"
__email__ = "ens01svn@cs.umu.se"
__status__ = "Development"
 
class ComrnaParserTest(TestCase):
    """Provides tests for COMRNA RNA secondary structure format parsers"""
 
    def setUp(self):
        """Setup function """
 
        #output
        self.comrna_out = COMRNA
        #expected
        self.comrna_exp = [['GGCUAGAUAGCUCA',[(0,13),(1,12),(4,9),(5,8)]],
                           ['GGCUAGAUAGCUCA',[(0,13),(1,12),(4,9),(5,8)]],
                           ['GGCUAGAUAGCUCA',[(0,13),(1,12),(4,9),(5,8)]],
                           ['GGCUAGAUAGCUCA',[(0,13),(1,12)]]]
 
 
    def test_comrna_output(self):
        """Test for comrna format parser"""
 
        obs = comRNA_parser(self.comrna_out)
        self.assertEqual(obs,self.comrna_exp)
 
    def test_common_func(self):
        """Test common function in comrna parser """
        obs = common(self.comrna_exp)
        exp = [['GGCUAGAUAGCUCA',[(0,13),(1,12),(4,9),(5,8)]],
               ['GGCUAGAUAGCUCA',[(0,13),(1,12)]]]
        self.assertEqual(obs,exp)
 
COMRNA = ['comRNA input.fasta \n', '\n', 'PARAMETERS: \n', 'L  =     4,   Minimum length of a straight stem;\n', 'E  = -5.00,   Maximum stem energy allowed for a stem to be analyzed, in kcal/mol;\n', 'S  =  0.00,   Minimum stem similarity score b/w two stems compared;\n', 'Sh =  0.60,   Maximum stem similarity score threshold that will be tested;\n', 'Sl =  0.20,   Minimum stem similarity score threshold that will be tested;\n', 'P  =  0.50,   Minimum percentage of sequences in which a common structure should occur;\n', 'n  =    10,   Number of common structures to be reported;\n', 'x  =   999,   Maximum number of pseudoknot crossover pattern allowed between one stem and other stems in a structure;\n', 'a  =     1,   Use anchor region during stem comparison;\n', 'o  =     4,   Maximum number of overlapping nucleotides allowed between two stems;\n', 'c  =  0.30,   Maximum percentage of stem length that is allowed overlapping between two stems;\n', 'j  =  0.70,   Maximum percentage of stems allowed overlapping between two different cliques;\n', 'r  =  0.40,   Minimum percentage of stems required to be same for two cliques to be considered same when reporting structures;\n', 'f  =    10,   Number of flanking nucleotides of a stem to be refolded together during structure refinement;\n', 'v  =     5,   Maximum length of nucleotides allowed for a new loop to deviate from its length in the original structure pattern;\n', 'g  =     0,   Use topological sort to assemble stem blocks;\n', '\n', 'Sequence file name: "input.fasta"\n', '\n', 'Sequences loaded ...\n', '1    seq1                   72 nt\n', '2    seq2                   72 nt\n', '3    seq3                   72 nt\n', '4    seq4                   72 nt\n', '\n', '\n', 'Number of stems in each energy bin for each sequence:\n', '\n', 'energy[kc/m]            -10   -9   -8   -7   -6   -5   -4   -3   -2   -1    0\n', 'seq1                    2    1    1    6    1    5    9    2    1    3    0    1\n', 'seq2                    2    1    1    6    1    5    9    2    1    3    0    1\n', 'seq3                    2    1    1    6    1    5    9    2    1    3    0    1\n', 'seq4                    2    1    1    6    1    5    9    2    1    3    0    1\n', '\n', '\n', 'Pairwise Sequence Identity (%): \n', '\n', '      1   2   3   4\n', '\n', ' 1   -  100 100 100\n', ' 2  100  -  100 100\n', ' 3  100 100  -  100\n', ' 4  100 100 100  - \n', '\n', 'Average Pairwise Sequence Identity (%): 100\n', '\n', 'Comparing stems pairwise ... \n', '\n', 'Number of edges that has stem-similarity-score higher than a certain threshold in the stem graph:\n', '\n', 'Score:           0.8  0.78  0.76  0.74  0.72   0.7  0.68  0.66  0.64  0.62   0.6  0.58  0.56  0.54  0.52   0.5  0.48  0.46  0.44  0.42   0.4  0.38  0.36  0.34  0.32   0.3  0.28  0.26  0.24  0.22   0.2\n', 'Num of edges:     12    12    18    18    18    24    24    54    72    78   102   114   114   120   132   132   144   156   168   168   174   180   180   180   180   180   180   180   180   180   180   180\n', '\n', 'Time spent on comparing stems: 0.03 seconds user CPU time; 0.04 seconds real time.\n', '\n', 'Maximum structure finding time: 1 min\n', '\n', '===========================  S = 0.6  ===========================\n', '\n', 'Find conserved stems (cliques) ... ==== 17 cliques ==== 17 unique ====\n', 'Time spent on finding conserved stems: 0 sec CPU time; 0 sec clock time.\n', '\n', 'Construct clique topological graph ... ==== 53 edges ====\n', 'Assemble conserved stems (cliques) ... ==== 44 structures ====\n', 'Time spent on topologically assembling conserved stems: 0 sec CPU time; 0 sec clock time.\n', '\n', 'Report top 10 structures.\n', '-------------------------------------------\n', 'Structure #1: Score = 10.1, pattern: 41, path: 0 1 3 , comseq: 1 2 3 4 , incompatible_seq: 0() 1() 3() \n', '(a)  Clique 0: OriginalScore = 3.82, ModifiedScore = 3.82\n', '  1, seq1                    1 GGCUAGA   7 ...  66 UCUGGCC  72  [-13 kc/m]\n', '  2, seq2                    1 GGCUAGA   7 ...  66 UCUGGCC  72  [-13 kc/m]\n', '  3, seq3                    1 GGCUAGA   7 ...  66 UCUGGCC  72  [-13 kc/m]\n', '  4, seq4                    1 GGCUAGA   7 ...  66 UCUGGCC  72  [-13 kc/m]\n', '(b)  Clique 1: OriginalScore = 3.45, ModifiedScore = 3.45\n', '  1, seq1                   29 GGAUUGAA  36 ...  54 UUCGAUCC  61  [-11.6 kc/m]\n', '  2, seq2                   29 GGAUUGAA  36 ...  54 UUCGAUCC  61  [-11.6 kc/m]\n', '  3, seq3                   29 GGAUUGAA  36 ...  54 UUCGAUCC  61  [-11.6 kc/m]\n', '  4, seq4                   29 GGAUUGAA  36 ...  54 UUCGAUCC  61  [-11.6 kc/m]\n', '(c)  Clique 3: OriginalScore = 2.82, ModifiedScore = 2.82\n', '  1, seq1                   49 GUCGG  53 UUCGAUC  61 CCGGC  65  [-8.4 kc/m]\n', '  2, seq2                   49 GUCGG  53 UUCGAUC  61 CCGGC  65  [-8.4 kc/m]\n', '  3, seq3                   49 GUCGG  53 UUCGAUC  61 CCGGC  65  [-8.4 kc/m]\n', '  4, seq4                   49 GUCGG  53 UUCGAUC  61 CCGGC  65  [-8.4 kc/m]\n', '\n', '\n', 'seq1                    1 GGCUAGAUAGCUCA 14   \n', '                          aa  bb  bb  aa\n', 'seq2                    1 GGCUAGAUAGCUCA 14  \n', '                          aa  bb  bb  aa\n', 'seq3                    1 GGCUAGAUAGCUCA 14   \n', '                          aa  bb  bb  aa\n', 'seq4                    1 GGCUAGAUAGCUCA 14   \n', '                          aa          aa\n', '\n', '\n', '-------------------------------------------']
 
if __name__ == '__main__':
    main()