#!/usr/bin/env python
"""Unit tests for the Nexus Parser
"""
 
from cogent.util.unit_test import TestCase, main
from cogent.parse.nexus import get_tree_info, parse_nexus_tree, parse_PAUP_log, \
     split_tree_info, parse_trans_table, parse_dnd, get_BL_table, parse_taxa, \
     find_fields
 
__author__ = "Catherine Lozupone"
__copyright__ = "Copyright 2007-2012, The Cogent Project"
__credits__ = ["Catherine Lozupone", "Rob Knight", "Micah Hamady"]
__license__ = "GPL"
__version__ = "1.5.3"
__maintainer__ = "Catherine Lozupone"
__email__ = "lozupone@colorado.edu"
__status__ = "Production"
 
Nexus_tree = """#NEXUS 
 
Begin trees;  [Treefile saved Wednesday, May 5, 2004  5:02 PM]
[!
>Data file = Grassland_short.nex
>Neighbor-joining search settings:
>  Ties (if encountered) will be broken systematically
>  Distance measure = Jukes-Cantor
>  (Tree is unrooted)
]
	Translate
		1 outgroup25,
		2 AF078391l,
		3 AF078211af,
		4 AF078393l,
		5 AF078187af,
		6 AF078320l,
		7 AF078432l,
		8 AF078290af,
		9 AF078350l,
		10 AF078356l,
		11 AF078306af,
		12 AF078429l,
		13 AF078256af,
		14 AF078443l,
		15 AF078450l,
		16 AF078452l,
		17 AF078258af,
		18 AF078380l,
		19 AF078251af,
		20 AF078179af,
		21 outgroup258
		;
tree PAUP_1 = [&R] (1,(2,(((3,4),(5,(((((6,10),9),(11,18)),((((7,15),19),17),(8,(12,(14,16))))),13))),20)),21);
tree PAUP_2 = [&R] (1,(2,(((3,4),(5,(((((6,10),9),(11,18)),((((7,15),19),17),(8,(12,(14,16))))),13))),20)),21);
End;""".split('\n')
 
Nexus_tree_2 = """#NEXUS 
 
Begin trees;  [Treefile saved Wednesday, June 14, 2006  11:20 AM]
[!>Neighbor-joining search settings:
>  Ties (if encountered) will be broken systematically
>  Distance measure = uncorrected ("p")
>  (Tree is unrooted)
]
tree nj = [&U] ((((((((((YA10260L1:0.01855,SARAG06_Y:0.00367):0.01965,(((YA270L1G0:0.01095,SARAD10_Y:0.00699):0.01744,YA270L1A0:0.04329):0.00028,((YA165L1C1:0.01241,SARAA02_Y:0.02584):0.00213,((YA165L1H0:0.00092,SARAF10_Y:-0.00092):0.00250,(YA165L1A0:0.00177,SARAH10_Y:0.01226):0.00198):0.00131):0.00700):0.01111):0.11201,(YA160L1F0:0.00348,SARAG01_Y:-0.00122):0.13620):0.01202,((((YRM60L1D0:0.00357,(YRM60L1C0:0.00477,SARAE10_Y:-0.00035):0.00086):0.00092,SARAE03_Y:0.00126):0.00125,SARAC11_Y:0.00318):0.00160,YRM60L1H0:0.00593):0.09975):0.07088,SARAA01_Y:0.02880):0.00190,SARAB04_Y:0.05219):0.00563,YRM60L1E0:0.06099):0.00165,(YRM60L1H0:0.00450,SARAF11_Y:0.01839):0.00288):0.00129,YRM60L1B1:0.00713):0.00194,(YRM60L1G0:0.00990,(YA165L1G0:0.00576,(YA160L1G0:0.01226,SARAA11_Y:0.00389):0.00088):0.00300):0.00614,SARAC06_Y:0.00381);
end;""".split('\n')
 
Nexus_tree_3 = """#NEXUS 
 
Begin trees;  [Treefile saved Wednesday, May 5, 2004  5:02 PM]
[!
>Data file = Grassland_short.nex
>Neighbor-joining search settings:
>  Ties (if encountered) will be broken systematically
>  Distance measure = Jukes-Cantor
>  (Tree is unrooted)
]
	Translate
		1 outgroup25,
		2 AF078391l,
		3 'AF078211af',
		4 AF078393l,
		5 AF078187af,
		6 AF078320l,
		7 AF078432l,
		8 AF078290af,
		9 AF078350l,
		10 AF078356l,
		11 AF078306af,
		12 AF078429l,
		13 AF078256af,
		14 'AF078443l',
		15 AF078450l,
		16 AF078452l,
		17 AF078258af,
		18 'AF078380l',
		19 AF078251af,
		20 AF078179af,
		21 outgroup258
		;
tree PAUP_1 = [&R] (1,(2,(((3,4),(5,(((((6,10),9),(11,18)),((((7,15),19),17),(8,(12,(14,16))))),13))),20)),21);
tree PAUP_2 = [&R] (1,(2,(((3,4),(5,(((((6,10),9),(11,18)),((((7,15),19),17),(8,(12,(14,16))))),13))),20)),21);
End;""".split('\n')
 
PAUP_log = """
P A U P *
Version 4.0b10 for Macintosh (PPC/Altivec)
Wednesday, May 5, 2004  5:03 PM
 
This copy registered to: Scott Dawson
                         UC-Berkeley
                         (serial number = B400784)
 
      -----------------------------NOTICE-----------------------------
        This is a beta-test version.  Please report any crashes,
        apparent calculation errors, or other anomalous results.
        There are no restrictions on publication of results obtained
        with this version, but you should check the WWW site
        frequently for bug announcements and/or updated versions.  
        See the README file on the distribution media for details.
      ----------------------------------------------------------------
 
Tree description:
 
  Optimality criterion = parsimony
    Character-status summary:
      Of 500 total characters:
        All characters are of type 'unord'
        All characters have equal weight
        253 characters are constant
        109 variable characters are parsimony-uninformative
        Number of parsimony-informative characters = 138
    Multistate taxa interpreted as uncertainty
    Character-state optimization: Accelerated transformation (ACCTRAN)
                                  AncStates = "standard"
 
Tree number 1 (rooted using user-specified outgroup)
 
Branch lengths and linkages for tree #1
 
                                     Assigned       Minimum       Maximum
                    Connected          branch      possible      possible
   Node              to node           length        length        length
-------------------------------------------------------------------------
    40                root                  0             0             0
outgroup25 (1)*         40                 40            24            52
    39                  40                 57            15            72
AF078391l (2)           39                 56            48            81
    38                  39                 33            17            71
    37                  38                 31            14            48
    22                  37                 20            11            33
AF078211af (3)          22                  4             2             7
AF078393l (4)           22                  1             0             3
    36                  37                 14             5            32
AF078187af (5)          36                 18            10            28
    35                  36                 21            16            45
    34                  35                 10             3            23
    26                  34                  5             3             9
    24                  26                  4             3            13
    23                  24                  0             0             3
AF078320l (6)           23                  1             1             3
AF078356l (10)          23                  2             2             2
AF078350l (9)           24                  5             3             5
    25                  26                  9             2            10
AF078306af (11)         25                  6             4            10
AF078380l (18)          25                  5             3            10
    33                  34                  5             4            15
    29                  33                  3             1             4
    28                  29                  2             2             2
    27                  28                  3             1             3
AF078432l (7)           27                  2             2             2
AF078450l (15)          27                  3             3             4
AF078251af (19)         28                  6             6             7
AF078258af (17)         29                  6             6             6
    32                  33                  4             3            15
AF078290af (8)          32                  9             8            11
    31                  32                  9             6            18
AF078429l (12)          31                  2             1             5
    30                  31                 10             9            18
AF078443l (14)          30                  2             1             6
AF078452l (16)          30                  4             4             5
AF078256af (13)         35                  4             1             6
AF078179af (20)         38                 48            34            79
outgroup258 (21)*       40                 45            27            67
-------------------------------------------------------------------------
Sum                                       509
 
Tree length = 509
Consistency index (CI) = 0.7151
Homoplasy index (HI) = 0.2849
""".split('\n')
 
line1 = "    40                root                  0             0           0"
line2 = "outgroup25 (1)*         40                 40            24            52"
line3 = "    39                  40                 57            15            72"
line4 = "AF078391l (2)           39                 56            48            81"
 
 
 
class NexusParserTests(TestCase):
    """Tests of the Nexus Parser functions"""
 
    def test_parse_nexus_tree(self):
        """parse_nexus_tree returns a dnd string and a translation table list"""
        Trans_table, dnd = parse_nexus_tree(Nexus_tree)
 
        #check the full dendrogram string is returned
        self.assertEqual(dnd['tree PAUP_1'],\
         "(1,(2,(((3,4),(5,(((((6,10),9),(11,18)),((((7,15),19),17),(8,(12,(14,16))))),13))),20)),21);")  
 
        #check that all taxa are returned in the Trans_table
        self.assertEqual(Trans_table['1'], 'outgroup25')
        self.assertEqual(Trans_table['2'], 'AF078391l')
        self.assertEqual(Trans_table['3'], 'AF078211af')
        self.assertEqual(Trans_table['4'], 'AF078393l')
        self.assertEqual(Trans_table['5'], 'AF078187af')
        self.assertEqual(Trans_table['6'], 'AF078320l')
        self.assertEqual(Trans_table['21'], 'outgroup258')
        self.assertEqual(Trans_table['20'], 'AF078179af')
        self.assertEqual(Trans_table['19'], 'AF078251af')
 
        #check that Nexus files without translation table work
        Trans_table, dnd = parse_nexus_tree(Nexus_tree_2)
        self.assertEqual(Trans_table, None)
        self.assertEqual(dnd['tree nj'], '((((((((((YA10260L1:0.01855,SARAG06_Y:0.00367):0.01965,(((YA270L1G0:0.01095,SARAD10_Y:0.00699):0.01744,YA270L1A0:0.04329):0.00028,((YA165L1C1:0.01241,SARAA02_Y:0.02584):0.00213,((YA165L1H0:0.00092,SARAF10_Y:-0.00092):0.00250,(YA165L1A0:0.00177,SARAH10_Y:0.01226):0.00198):0.00131):0.00700):0.01111):0.11201,(YA160L1F0:0.00348,SARAG01_Y:-0.00122):0.13620):0.01202,((((YRM60L1D0:0.00357,(YRM60L1C0:0.00477,SARAE10_Y:-0.00035):0.00086):0.00092,SARAE03_Y:0.00126):0.00125,SARAC11_Y:0.00318):0.00160,YRM60L1H0:0.00593):0.09975):0.07088,SARAA01_Y:0.02880):0.00190,SARAB04_Y:0.05219):0.00563,YRM60L1E0:0.06099):0.00165,(YRM60L1H0:0.00450,SARAF11_Y:0.01839):0.00288):0.00129,YRM60L1B1:0.00713):0.00194,(YRM60L1G0:0.00990,(YA165L1G0:0.00576,(YA160L1G0:0.01226,SARAA11_Y:0.00389):0.00088):0.00300):0.00614,SARAC06_Y:0.00381);')
 
 
    def test_parse_nexus_tree_sq(self):
        """remove single quotes from tree and translate tables"""
        Trans_table, dnd = parse_nexus_tree(Nexus_tree_3)
 
        #check the full dendrogram string is returned
        self.assertEqual(dnd['tree PAUP_1'],\
         "(1,(2,(((3,4),(5,(((((6,10),9),(11,18)),((((7,15),19),17),(8,(12,(14,16))))),13))),20)),21);")  
 
        #check that all taxa are returned in the Trans_table
        self.assertEqual(Trans_table['1'], 'outgroup25')
        self.assertEqual(Trans_table['2'], 'AF078391l')
        self.assertEqual(Trans_table['3'], 'AF078211af')
        self.assertEqual(Trans_table['4'], 'AF078393l')
        self.assertEqual(Trans_table['5'], 'AF078187af')
        self.assertEqual(Trans_table['6'], 'AF078320l')
        self.assertEqual(Trans_table['21'], 'outgroup258')
        self.assertEqual(Trans_table['20'], 'AF078179af')
        self.assertEqual(Trans_table['19'], 'AF078251af')
 
 
    def test_get_tree_info(self):
        """get_tree_info returns the Nexus file section that describes the tree"""
        result = get_tree_info(Nexus_tree)
        self.assertEqual(len(result), 33)
        self.assertEqual(result[0],\
            "Begin trees;  [Treefile saved Wednesday, May 5, 2004  5:02 PM]")
        self.assertEqual(result[31], \
            "tree PAUP_1 = [&R] (1,(2,(((3,4),(5,(((((6,10),9),(11,18)),((((7,15),19),17),(8,(12,(14,16))))),13))),20)),21);")
 
    def test_split_tree_info(self):
        """split_tree_info splits lines into header, Trans_table, and dnd"""
        tree_info = get_tree_info(Nexus_tree)
        header, trans_table, dnd = split_tree_info(tree_info)
 
        self.assertEqual(len(header), 9)
 
        self.assertEqual(len(trans_table), 22)
 
        self.assertEqual(len(dnd), 2)
 
        self.assertEqual(header[0],\
            "Begin trees;  [Treefile saved Wednesday, May 5, 2004  5:02 PM]")
        self.assertEqual(header[8], "\tTranslate")
 
        self.assertEqual(trans_table[0], "\t\t1 outgroup25,")
        self.assertEqual(trans_table[21], "\t\t;")
 
        self.assertEqual(dnd[0], \
            "tree PAUP_1 = [&R] (1,(2,(((3,4),(5,(((((6,10),9),(11,18)),((((7,15),19),17),(8,(12,(14,16))))),13))),20)),21);")
 
    def test_parse_trans_table(self):
        """parse_trans_table returns a dict with the taxa names indexed by number"""
        tree_info = get_tree_info(Nexus_tree)
        header, trans_table, dnd = split_tree_info(tree_info)
        Trans_table = parse_trans_table(trans_table)
 
        self.assertEqual(len(Trans_table), 21)
 
        #check that taxa are returned in the Trans_table
        self.assertEqual(Trans_table['1'], 'outgroup25')
        self.assertEqual(Trans_table['2'], 'AF078391l')
        self.assertEqual(Trans_table['3'], 'AF078211af')
        self.assertEqual(Trans_table['4'], 'AF078393l')
        self.assertEqual(Trans_table['5'], 'AF078187af')
        self.assertEqual(Trans_table['6'], 'AF078320l')
        self.assertEqual(Trans_table['21'], 'outgroup258')
        self.assertEqual(Trans_table['20'], 'AF078179af')
        self.assertEqual(Trans_table['19'], 'AF078251af')
 
    def test_parse_dnd(self):
        """parse_dnd returns a dict with dnd indexed by tree name"""
        tree_info = get_tree_info(Nexus_tree)
        header, trans_table, dnd = split_tree_info(tree_info)
        dnd_dict = parse_dnd(dnd)
        self.assertEqual(dnd_dict['tree PAUP_1'],\
         "(1,(2,(((3,4),(5,(((((6,10),9),(11,18)),((((7,15),19),17),(8,(12,(14,16))))),13))),20)),21);")  
#------------------------------------------------------        
 
    def test_get_BL_table(self):
        """get_BL_table returns the section of the log file w/ the BL table"""
        BL_table = get_BL_table(PAUP_log)
        self.assertEqual(len(BL_table), 40)
        self.assertEqual(BL_table[0], \
        "    40                root                  0             0             0")         
        self.assertEqual(BL_table[39], \
        "outgroup258 (21)*       40                 45            27            67")
 
    def test_find_fields(self):
        """find_fields takes BL table line and returns field names mapped to info"""
        result = find_fields(line1)
        self.assertEqual(result['taxa'], "40")
        self.assertEqual(result['bl'], "0")
        self.assertEqual(result['parent'], "root")
 
    def test_parse_taxa(self):
        """parse_taxa should return the taxa # from a taxa_field from find_fields"""
        result1 = find_fields(line1)
        result2 = find_fields(line2)
        result3 = find_fields(line3)
        result4 = find_fields(line4)
 
        self.assertEqual(parse_taxa(result1["taxa"]), '40')
        self.assertEqual(parse_taxa(result2["taxa"]), '1')
        self.assertEqual(parse_taxa(result3["taxa"]), '39')
        self.assertEqual(parse_taxa(result4["taxa"]), '2')
 
    def test_parse_PAUP_log(self):
        """parse_PAUP_log extracts branch length info from a PAUP log file"""
        BL_dict = parse_PAUP_log(PAUP_log)
        self.assertEqual(len(BL_dict), 40)
        self.assertEqual(BL_dict['1'], ('40', 40))
        self.assertEqual(BL_dict['40'], ('root', 0))
        self.assertEqual(BL_dict['39'], ('40', 57))
        self.assertEqual(BL_dict['2'], ('39', 56))
        self.assertEqual(BL_dict['26'], ('34', 5))
        self.assertEqual(BL_dict['21'], ('40', 45))
 
 
 
#run if called from command line
if __name__ == '__main__':
    main()