#!/usr/bin/env python
""" Provides tests for EbiParser and related classes and functions.
from cogent.parse.ebi import cc_alternative_products_parser, \
    cc_basic_itemparser, cc_biophysicochemical_properties_parser, \
    cc_interaction_parser, cc_itemfinder, cc_parser, EbiFinder, \
    MinimalEbiParser, hanging_paragraph_finder, join_parser, \
    join_split_dict_parser, join_split_parser, labeloff, linecode_maker, \
    linecode_merging_maker, mapping_parser, pairs_to_dict, period_tail_finder, \
    rstrip_, ft_basic_itemparser, ft_id_parser, ft_mutagen_parser, \
    ft_mutation_parser, ft_parser, try_int, ra_parser, rc_parser, \
    rg_parser, rl_parser, rn_parser, rp_parser, rt_parser, rx_parser, \
    gn_parser, single_ref_parser, ac_parser, de_itemparser, dr_parser, \
    dt_parser, id_parser, oc_parser, og_parser, os_parser, ox_parser, \
    sq_parser, de_parser, RecordError, FieldError, curry, required_labels, \
from cogent.util.unit_test import TestCase, main
from cogent.core.sequence import Sequence
from cogent.core.info import Info
__author__ = "Zongzhi Liu"
__copyright__ = "Copyright 2007-2012, The Cogent Project"
__credits__ = ["Zongzhi Liu", "Rob Knight"]
__license__ = "GPL"
__version__ = "1.5.3"
__maintainer__ = "Zongzhi Liu"
__email__ = "zongzhi.liu@gmail.com"
__status__ = "Development"
def item_empty_filter(d):
    """return a dict with only nonempty values"""
    pairs = [(k,v) for (k,v) in d.iteritems() if v]
    return dict(pairs)
class EbiTests(TestCase):
    """ Tests ebi parsers and generic parsers and general functions """
    def setUp(self):
        """ Construct some fake data for testing purposes """
    def test_item_empty_filter(self):
        """item_empty_filter: known values"""
        inputs = [
            {1:0, 2:1, 3:'', 4:[], 5:False, 6:{}}]
        expects = [
        self.assertEqual(map(item_empty_filter, inputs), expects)
    def test_rstrip_(self):
        """rstrip_ should generate the expected function"""
        test = ' aaa;  '
        self.assertEqual(rstrip_('; ')(test),
                test.rstrip('; '))
        #test default
    def test_hanging_paragraph_finder(self):
        """hanging_paragraph_finder should give expected results"""
        f = hanging_paragraph_finder
        test = [ 
            '  content',
            '    content',
            '-bbb:',  #3
            '  bbb',
                [test[0:3], test[3:-1], test[-1:]])
        #test all indent lines
        all_indent = ['  aa', '  bb']
        self.assertEqual(list(f(all_indent)), [all_indent])
        #test empty lines
        self.assertEqual(list(f(['',' '])), [])
    def test_period_tail_finder(self):
        """period_tail_finder should yield each group of expected lines."""
        test = "a\naa.\nb\nbb.".splitlines()
    def test_EbiFinder(self):
        """EbiFinder should return expected list"""
        test = "a\n//\nb\n//".splitlines()
        test_fail = test + ['c']
                list, EbiFinder(test_fail))
    def test_pairs_to_dict(self):
        """pairs_to_dict should return the expected dict"""
        test_dict = {'a': 1, 'b': 2, 'b': 3,}
        sorted_items = [('a', 1), ('b', 2), ('b', 3),] 
        add_one = lambda x: x + 1
        double = lambda x: x*2
        set_zero = lambda x: 0
        handlers ={ 'a': add_one, 'b': double,}
        #test default all
                {'a': 1, 'b': 3})
        #test 'overwrite_value' 
        self.assertEqual(pairs_to_dict(sorted_items, 'overwrite_value'),
                {'a': 1, 'b': 3})
        #test no_duplicated_key, raise 
        self.assertRaises(ValueError, pairs_to_dict,
                sorted_items, 'no_duplicated_key')
        #test always_multi_value
        self.assertEqual(pairs_to_dict(sorted_items, 'always_multi_value'),
                {'a': [1], 'b': [2, 3]})
        #test allow multi_value
        self.assertEqual(pairs_to_dict(sorted_items, 'allow_multi_value'),
                {'a': 1, 'b': [2, 3]})
        #test raise error when  key not found in all_keys 
        f = curry(pairs_to_dict, all_keys=['a','c'])
        self.assertRaises(ValueError, f, sorted_items)
        #test handlers
        sorted_items.append(('c', 4))
        self.assertEqual(pairs_to_dict(sorted_items, handlers=handlers,
            default_handler=set_zero), {'a': 2, 'b': 6, 'c': 0})
        #test raise error when  no valid handlers were found 
        f = curry(pairs_to_dict, handlers=handlers)
        self.assertRaises(ValueError, f, sorted_items)
        #test sanity
        test_dict = dict(sorted_items)
    def test_linecode_maker(self):
        """linecode_maker: should return expected tuple"""
        tests = ['AA   aa.',
                 'BB    bb.',
                 'CC C   cc.',
                 'DD dd.']
        expected_linecodes =[
                'AA', 'BB', 'CC C', 'DD dd.']
        #pprint(map(linecode_maker, tests)) 
        self.assertEqual(map(linecode_maker, tests), 
                zip(expected_linecodes, tests))
    def test_labeloff(self):
        """labeloff: should return expected lines"""
        tests = ['AA   aa.',
                 'BB    bb.',
                 'CC C   cc.',
                 'DD dd.',
        #expects = [line[5:] for line in tests]
        expects = ['aa.', ' bb.', '  cc.', '.','','']
        self.assertEqual(labeloff(tests), expects) 
    def test_join_parser(self):
        """join parser should return expected str."""
        test_list = '"aaa\nbbb \nccc"; \n'.splitlines()
        test_str = 'aaa bb. '
        #test default, list input
                '"aaa bbb  ccc"')
        #test default, str input
                'aaa bb')
        #test no strip
        self.assertEqual(join_parser(test_list, chars_to_strip=''),
                '"aaa bbb  ccc"; ')
        #test strip 
        self.assertEqual(join_parser(test_list, chars_to_strip='"; '),
                'aaa bbb  ccc')
        #test empty
        self.assertEqual(join_parser(['', ' ']),'')
    def test_join_split_parser(self):
        """join_split_parser: should return expected"""
        f = join_split_parser
        assertEqual = self.assertEqual
        assertEqual(f(['aa; bb;', 'cc.']),
                ['aa', 'bb', 'cc'])
        assertEqual(f(['aa; bb, bbb;', 'cc.'],delimiters=';,'),
                ['aa', ['bb','bbb'], 'cc'])
        #test item_modifer
        assertEqual(f('aa (bb) (cc).', '(', item_modifier=rstrip_(') ')),
        assertEqual(f('aa (bb)xx (cc).', '(', item_modifier=rstrip_(') ')),
        #test empty
        assertEqual(f(['', ' ']),[''])
    def test_join_split_dict_parser(self):
        """join_split_dict_parser: should return expected"""
        f = join_split_dict_parser
        #test default
        self.assertEqual(f('aa=1; bb=2,3; cc=4 (if aa=1);'),
            {'aa':'1', 'bb': ['2','3'], 'cc': '4 (if aa=1)'})
        self.assertEqual(f('aa=1; bb=2,3; cc=4:5', delimiters=';=,:'),
                {'aa':'1', 'bb': ['2','3'], 'cc': '4:5'})
        #test strict=False -> splits without dict() 
        self.assertEqual(f('aa=1; bb.', strict=False), [['aa', '1'], ['bb']])
        #test strict -> raise ValueError
        self.assertRaises(ValueError, f, 'aa=1; bb.')
        self.assertRaises(ValueError, f, 'aa=1; bb=2=3.', ';=')
        self.assertRaises(ValueError, f, '')
    def test_mapping_parser(self):
        """mapping_parser: should return expected dict"""
        fields = [None, 'A', 'B', ('C', int), ('D', float)]
        line = 'blah aa bb; 2 3.1;'
        expect = dict(A='aa', B='bb', C=2, D=3.1)
        self.assertEqual(mapping_parser(line, fields), expect)
        #test more splits -> ignore the last splits
        line_leftover = line + 'blah blah'
        self.assertEqual(mapping_parser(line_leftover, fields), expect)
        #test more fields -> truncate the last fields
        fields_leftover = fields + ['E']
        self.assertEqual(mapping_parser(line, fields_leftover), expect)
        #test empty
        self.assertEqual(mapping_parser('', fields), {})
    def test_linecode_merging_maker(self):
        """linecode_merging_maker: """
        f = linecode_merging_maker
        lines =['ID   id.', 'RN   rn.', 'RR   invalid', 'RN  rn.']
        labels = ['ID', 'REF', 'RR', 'RN  rn.']
        self.assertEqual(map(f, lines), zip(labels, lines))
    def test_parse_header(lines):
    def test_MinimalEbiParser_valid(self):
        """MinimalEbiParser: integrity of output """
        f = curry(MinimalEbiParser, strict=False)
        #test valid result: sequence, number of records, keys of a header 
        valid_result = list(f(fake_records_valid))
        self.assertEqual(len(valid_result), 2)
        sequence, header = valid_result[0]
        self.assertEqual(sequence, 'aaccppgghhh')
        #the first fake record use only the required labels, the header is
        #deleted of '', which was assigned to sequence
                list(sorted(required_labels))[1:]) #[1:] to exclude the '' 
        #test selected_labels
        selected_labels = ['ID', 'DE']
        select_result = list(f(fake_records_valid, 
        #test bad record - unknown linecode or wrong line format
        self.assertRaises(ValueError, list, 
                f(fake_records_valid + ['ID   id.', 'RR   not valid.','//']))
        self.assertRaises(ValueError, list, 
                f(fake_records_valid + ['ID   id.', ' RN   bad format.','//']))
        self.assertRaises(ValueError, list, 
                f(fake_records_valid + ['ID   id.', 'RN  bad format.','//']))
        #test bad record - not end with '//'
        self.assertRaises(RecordError, list, f(fake_records_valid +
                ['ID   not end with //', '   seq']))
        #test strict: lacking required linecodes
        #?? How to test the error message?  warn message?
        #the first record, [:-5], is valid even when strict=True.
        the_first_valid = list(f(fake_records_valid[:-5], strict=True))[0]
        #[1] get the header_dict
        self.assertRaises(RecordError, list, 
                f(fake_records_valid, strict=True))
    def test_EbiParser(self):
        """EbiParser: """
        f = curry(EbiParser, strict=False)
        first_valid = fake_records_valid[:-5]
        #test valid
        self.assertEqual(len(list(f(fake_records_valid))), 2)
        #test skipping bad record which strict=False
        #self.assertEqual(len(list(f(fake_records_valid[:-1] +
        #    ['OX   xx=no equal.', '//']))), 1)
        ##test Raise RecordError from parse_head when strict=True
        #self.assertRaises(RecordError, list, f(first_valid[:-1] +
        #    ['OX   xx=no equal.', '//'], strict=True))
class RootParsersKnownValues(TestCase):
    """Test most xx_parsers with known value"""
    def test_id_parser(self):
        """id_parser should return expected dict"""
        id_line = [
            'ID   CYC_BOVIN      STANDARD;      PRT;   104 AA.']
        self.assertEqual( id_parser(id_line),
            {'DataClass': 'STANDARD', 'Length': 104, 'MolType': 'PRT', 
            'EntryName': 'CYC_BOVIN'})
    def test_sq_parser(self):
        """sq_parser should return expected dict"""
        lines = [
            "SQ   SEQUENCE   486 AA;  55639 MW;  D7862E867AD74383 CRC64;"]
        self.assertEqual( sq_parser(lines),
            {'Crc64': 'D7862E867AD74383', 'Length': 486, 
            'MolWeight': 55639})
    def test_ac_parser(self):
        """ac_parser should return expected list"""
        lines = [
            "AC   Q16653; O00713; O00714;",
            "AC   Q92892; Q92893;"]
        self.assertEqual( ac_parser(lines),
                ['Q16653', 'O00713', 'O00714', 'Q92892', 'Q92893'])
    def test_oc_parser(self):   
        """oc_parser should return expected list"""
        lines = [
            "OC   Eukaryota; Metazoa; Chordata;",
            "OC   Mammalia;"]
                ['Eukaryota', 'Metazoa', 'Chordata', 'Mammalia'])
    def test_dt_parser(self):
        """dt_parser should return expected list of list"""
        lines = \
            "DT   01-AUG-1988 (Rel. 08, Created)\n"\
            "DT   30-MAY-2000 (Rel. 39, Last sequence update)\n"\
            "DT   10-MAY-2005 (Rel. 47, Last annotation update)\n".splitlines()
        self.assertEqual( dt_parser(lines),
                ['01-AUG-1988 (Rel. 08, Created)',
                '30-MAY-2000 (Rel. 39, Last sequence update)',
                '10-MAY-2005 (Rel. 47, Last annotation update)'])
    def test_de_itemparser(self):
        """de_itemparser: known values"""
        inputs = [
            ' AAA (aa) ',
            'AAA [xx] (aa)',
        expects = [
            {'OfficalName': 'AAA', 'Synonyms': ['aa']},
            {'OfficalName': 'AAA [xx]', 'Synonyms': ['aa']},
            {'OfficalName': 'AAA', 'Synonyms': []},
            {'OfficalName': '', 'Synonyms': []}]
        #pprint(map(de_itemparser, inputs))
        self.assertEqual(map(de_itemparser, inputs), expects)
    def test_de_parser(self):
        """de_parser should return expected list"""
        inputs = [
            "DE   Annexin [Includes: CCC] [Contains: AAA] (Fragment).",
            "DE   A [Includes: II] (Fragment).",
            "DE   A [Contains: CC].",
            "DE   A (Fragment).",
            "DE   A (aa)."]
        filtered_dicts = [item_empty_filter(de_parser([e])) for e in inputs]
        self.assertEqual(map(len, filtered_dicts), [4, 3, 2, 2, 2])
    def test_os_parser(self):   
        """os_parser should return expected list"""
        lines = [
            'OS   Solanum melongena (Eggplant) (Auber-',
            'OS   gine).']
                ['Solanum melongena', 'Eggplant', 'Auber- gine'])
        lines = \
            """OS   Escherichia coli.""".splitlines()
                ['Escherichia coli'])
    def test_og_parser(self):   
        """og_parser should return expected list"""
        lines = [
            "OG   XXX; xx.",
            "OG   Plasmid R6-5, Plasmid IncFII R100 (NR1), and",
            "OG   Plasmid IncFII R1-19 (R1 drd-19)."]
            ['XXX; xx', [
            'Plasmid R6-5', 'Plasmid IncFII R100 (NR1)',
            'Plasmid IncFII R1-19 (R1 drd-19)']])
    def test_ox_parser(self):   
        """ox_parser should return expected dict"""
        lines = ["OX   NCBI_TaxID=9606;"]
                {'NCBI_TaxID': '9606'})
    def test_gn_parser(self):   
        """gn_parser should return expected list of dict"""
        lines = [
            "GN   Name=hns; Synonyms=bglY, cur, topS;",
            "GN   OrderedLocusNames=b1237, c1701, ECs1739;"]
            [{'Synonyms': ['bglY', 'cur', 'topS'], 
            'OrderedLocusNames': ['b1237', 'c1701', 'ECs1739'],
            'Name': 'hns'}])
        lines = [
            "GN   Name=Jon99Cii; Synonyms=SER1, Ser99Da; ORFNames=CG7877;",
            "GN   and",
            "GN   Name=Jon99Ciii; Synonyms=SER2, Ser99Db; ORFNames=CG15519;"]
            [{'ORFNames': 'CG7877', 'Synonyms': ['SER1', 'Ser99Da'],
            'Name': 'Jon99Cii'},
            {'ORFNames': 'CG15519', 'Synonyms': ['SER2', 'Ser99Db'],
            'Name': 'Jon99Ciii'}])
    def test_dr_parser(self):   
        """dr_parser should return expected dict"""
        lines = dr_lines
        self.assertEqual(dr_parser(lines), dr_expect)
class FT_Tests(TestCase):
    """Tests for FT parsers. """
    def test_ft_basic_itemparser(self):
        """ft_basic_itemparser: known values"""
        inputs = [
            ['DNA_BIND    >102    292'],
            ['CONFLICT    327    327       E -> R (in Ref. 2).'],
            ['PROPEP      ?25     48',
            '                             /FTId=PRO_021449.',],
            ['VARIANT     214    214       V -> I.',
            '                             /FTId=VAR_009122.',],]
        expects = [
            ('DNA_BIND', '>102', 292, ''),
            ('CONFLICT', 327, 327, 'E -> R (in Ref. 2)'),
            ('PROPEP', '?25', 48, '/FTId=PRO_021449'),
            ('VARIANT', 214, 214, 'V -> I. /FTId=VAR_009122')]
        #pprint(map(ft_basic_itemparser, inputs))
        self.assertEqual(map(ft_basic_itemparser, inputs), expects)
    def test_try_int(self):
        """try_int: known values"""
        inputs = ['9', '0', '-3', '2.3', '<9', '>9', '?', '?35', '']
        expects = [9,   0,   -3,  '2.3', '<9', '>9', '?', '?35', '']
        self.assertEqual(map(try_int, inputs), expects)
    def test_ft_id_parser(self):
        """ft_id_parser: known values"""
        inputs = [
             'V -> I. /FTId=VAR_009122',
             'E -> R (tumor). /FTId=VAR_002343',]
        expects = [
            {'Description': '', 'Id': ''},
            {'Description': 'ddd', 'Id': ''},
            {'Description': '', 'Id': 'PRO_021449'},
            {'Description': 'V -> I', 'Id': 'VAR_009122'},
            {'Description': 'E -> R (tumor)', 'Id': 'VAR_002343'}]
        #pprint(map(ft_id_parser, inputs))
        self.assertEqual(map(ft_id_parser, inputs), expects)
    def test_ft_mutation_parser(self):
        """ft_mutation_parser: known values"""
        inputs = [
             'ddd',  #should raise error?
             'V -> I. /FTId=xxxxxx',  #should raise error?
             'V -> I',
             'E -> R (tumor)',
             'missing (tumor)', ]
        expects = [
            {'MutateFrom': '', 'Comment': '', 'MutateTo': ''},
            {'MutateFrom': 'ddd', 'Comment': '', 'MutateTo': ''},
            {'MutateFrom': 'V', 'Comment': '', 'MutateTo': 'I. /FTId=xxxxxx'},
            {'MutateFrom': 'V', 'Comment': '', 'MutateTo': 'I'},
            {'MutateFrom': 'E', 'Comment': 'tumor', 'MutateTo': 'R'},
            {'MutateFrom': 'missing ', 'Comment': 'tumor', 'MutateTo': ''}]
        #pprint(map(ft_mutation_parser, inputs))
        self.assertEqual(map(ft_mutation_parser, inputs), expects)
    def test_ft_mutation_parser_raise(self):
        """ft_mutation_parser: raise ValueError"""
    def test_ft_mutagen_parser(self):
        """ft_mutagen_parser: known values"""
        inputs = [
             'C->R,E,A: Loss of cADPr hydrolas',
             'Missing: Abolishes ATP-binding', ]
        expects = [
            {'Comment': ' Loss of cADPr hydrolas',
             'MutateFrom': 'C', 'MutateTo': 'R,E,A'},
            {'Comment': ' Abolishes ATP-binding',
             'MutateFrom': 'Missing', 'MutateTo': ''}]
        #pprint(map(ft_mutagen_parser, inputs))
        self.assertEqual(map(ft_mutagen_parser, inputs), expects)
    def test_ft_id_mutation_parser(self):
        """ft_id_mutation_parser: known values"""
    def test_ft_parser(self):   
        """ft_parser should return expected dict"""
        lines = ft_lines
        self.assertEqual(ft_parser(lines), ft_expect)
class CC_Tests(TestCase):
    """tests for cc_parsers. """
    def test_cc_itemfinder_valid(self):
        """cc_itemfinder: yield each expected block.""" 
        input_with_license = labeloff(cc_lines)
            input_with_license))), 9) 
        input_without_license = labeloff(cc_lines[:-4])
            input_without_license))), 8) 
    def test_cc_itemfinder_raise(self):
        """cc_itemfinder: raise RecordError if license block bad.""" 
        input_with_license_lacking_bottom = labeloff(cc_lines[:-1]) 
        self.assertRaises(FieldError, cc_itemfinder,
    def test_cc_basic_itemparser(self):
        """cc_basic_itemparser: known results or FieldError"""
        valid_topics = [
            ['-!- topic1: first: line', '    second line'],
            ['-!- topic2: ', '    first line', '   second line'],
            [' topic3: not treated invalid topic format'],]
        expects = [
            ('topic1', ['first: line', 'second line']),
            ('topic2', ['first line', 'econd line']),
            ('topic3', ['not treated invalid topic format']),]
        self.assertEqual(map(cc_basic_itemparser, valid_topics),
        bad_topic = ['-!- bad_topic without colon', '    FieldError']
        self.assertRaises(FieldError, cc_basic_itemparser, bad_topic)
    def test_cc_interaction_parser(self):
        """cc_interaction_parser: known values"""
        inputs = [ 
            ['Self; NbExp=1; IntAct=EBI-123485, EBI-123485;',
            'Q9W158:CG4612; NbExp=1; IntAct=EBI-123485, EBI-89895;',
            'Q9VYI0:fne; NbExp=1; IntAct=EBI-123485, EBI-126770;',]]
        expects =[
            [('Self', {'NbExp': '1', 
                'IntAct': ['EBI-123485', 'EBI-123485']}),
            ('Q9W158:CG4612', {'NbExp': '1', 
                'IntAct': ['EBI-123485', 'EBI-89895']}),
            ('Q9VYI0:fne', {'NbExp': '1', 
                    'IntAct': ['EBI-123485', 'EBI-126770']})]]
        self.assertEqual(map(cc_interaction_parser, inputs), expects)
    def test_cc_biophysicochemical_properties_parser(self):
        """cc_biophysicochemical_properties_parser: known values"""
        #pprint(cc['BIOPHYSICOCHEMICAL PROPERTIES'])  #topic specific parser
        f = cc_biophysicochemical_properties_parser
        valid_inputs = [
            ['Kinetic parameters:',
            '  KM=98 uM for ATP;',
            '  KM=688 uM for pyridoxal;',
            '  Vmax=1.604 mmol/min/mg enzyme;',
            'pH dependence:',
            '  Optimum pH is 6.0. Active pH 4.5 to 10.5;',]
        expects = [
            {'Kinetic parameters': {
                'KM': ['98 uM for ATP', '688 uM for pyridoxal'],
                'Vmax': '1.604 mmol/min/mg enzyme'},
            'pH dependence': 'Optimum pH is 6.0. Active pH 4.5 to 10.5'},
        self.assertEqual(map(f, valid_inputs), expects)
    def test_cc_alternative_products_parser(self):
        """cc_alternative_products_parser: know values"""
        f = cc_alternative_products_parser
        valid_inputs = [
            ['Event=Alternative initiation;'
             '  Comment=Free text;',
            'Event=Alternative splicing; Named isoforms=3;',
            '  Comment=Additional isoforms seem to exist.',
            '  confirmation;',
            'Name=1; Synonyms=AIRE-1;',
            '  IsoId=O43918-1; Sequence=Displayed;',
            'Name=3; Synonyms=AIRE-3,',
            'ai-2, ai-3;',  #broken the hanging_paragraph_finder
            '  IsoId=O43918-3; Sequence=VSP_004089, VSP_004090;',]]
        expects = \
            [[{'Comment': 'Free text', 'Event': 'Alternative initiation'},
              {'Comment': 'Additional isoforms seem to exist. confirmation',
               'Event': 'Alternative splicing',
               'Named isoforms': '3',
               'Names': [{'IsoId': 'O43918-1',
                          'Name': '1',
                          'Sequence': 'Displayed',
                          'Synonyms': 'AIRE-1'},
                         {'IsoId': 'O43918-3',
                          'Name': '3',
                          'Sequence': ['VSP_004089', 'VSP_004090'],
                          'Synonyms': ['AIRE-3', 'ai-2', 'ai-3']}]}]]
        self.assertEqual(map(f, valid_inputs), expects)
    def test_cc_parser(self):
        """cc_parser: known values and raise when strict"""
        cc = cc_parser(cc_lines)
        #print cc.keys()
        #test Disease topic (default_handler)
        self.assertEqual(cc['DISEASE'], [
            'Defects in PHKA1 are linked to X-linked muscle glycogenosis '
            'Defects in ABCD1 are the cause of recessive X-linked '
                'adrenoleukodystrophy (X-ALD) [MIM:300100]. X-ALD is a rare '
        #test License (default_handler)
        self.assertEqual(cc['LICENSE'], [
            'This SWISS-PROT entry is copyright. It is produced through a '
                'collaboration removed'])
        #pprint(cc['DATABASE'])  #join_split_dict
        self.assertEqual(cc['DATABASE'], [{
            'NAME': 'CD40Lbase',
            'NOTE': 'European CD40L defect database (mutation db)',
            'WWW': '"http://www.expasy.org/cd40lbase/"'}])
        #test strict
        cc_lines_with_unknown_topic = ['CC   -!- BLAHBLAH: xxxxx'] + cc_lines
        self.assertRaises(FieldError, cc_parser,
                cc_lines_with_unknown_topic, strict=True)
class ReferenceTests(TestCase):
    """Tests for parsers related to reference blocks"""
    def test_ref_finder(self):
        """ref_finder: should return a list of ref blocks"""
    def test_refs_parser(self):
        """refs_parser: should return a dict of {RN: ref_dict}"""
    def test_single_ref_parser(self):
        """single_ref_parser: should return the expected dict"""
        fake_ref_block = ['RN   [1]',
            'RP   NUCLEOTIDE',
            'RC   STRAIN=Bristol N2;',
            'RX   PubMed=1113;',
            'RA   Zhang L., Wu S.-L., Rubin C.S.;',
            'RT   "A novel ";',
            'RL   J. Biol. Chem. 276:10.',]
        rn, others = single_ref_parser(fake_ref_block)
        self.assertEqual(rn, 1)
        self.assertEqual(len(others), 6)
        #test strict: lacking required labels
            fake_ref_block[:-1], strict=False)[1]), 5)
        self.assertRaises(RecordError, single_ref_parser, fake_ref_block[:-1],
    def test_ra_parser(self):
        """ra_parser should return expected list"""
        lines = \
            "RA   Galinier A., Bleicher F., Negre D.,\n"\
            "RA   Cozzone A.J., Cortay J.-C.;\n".splitlines()
        self.assertEqual( ra_parser(lines),
            ['Galinier A.', 'Bleicher F.', 'Negre D.', 'Cozzone A.J.', 
            'Cortay J.-C.'])
    def test_rx_parser(self):   
        """rx_parser should return expected dict"""
        inputs = [
            ['RX   MEDLINE=22709107; PubMed=12788972; DOI=10.1073/pnas.113'],
            ['RX   PubMed=14577811; '\
        expects = [
            {'DOI': '10.1073/pnas.113', 'MEDLINE': '22709107', 
                'PubMed': '12788972'},
            {'DOI': '10.1597/1545-1569(2003)040<0632:AMMITS>2.0.CO;2',
                'PubMed': '14577811'}]
        self.assertEqual(map(rx_parser, inputs), expects)
    def test_rc_parser(self):   
        """rc_parser should return expected dict"""
        lines = [
            "RC   PLASMID=R1 (R7268); TRANSPOSON=Tn3;",
            "RC   STRAIN=AL.012, AZ.026;"]
             {'TRANSPOSON': 'Tn3', 'PLASMID': 'R1 (R7268)', 
             'STRAIN': ['AL.012', 'AZ.026']})
    def test_rt_parser(self):
        """rt_parser should return expected str"""
        lines = [
            'RT   "New insulin-like proteins',
            'RT   analysis and homology modeling.";']
        self.assertEqual( rt_parser(lines),
            'New insulin-like proteins analysis and homology modeling')
    def test_rl_parser(self):
        """rl_parser should return expected str"""
        lines = [
            "RL   J. Mol. Biol. 168:321-331(1983)."]
        self.assertEqual( rl_parser(lines),
            'J. Mol. Biol. 168:321-331(1983)')
    def test_rn_parser(self):
        """rn_parser should return expected str"""
        lines = [ "RN   [8]"]
        self.assertEqual( rn_parser(lines), 8)
    def test_rg_parser(self):
        """rg_parser should return expected str"""
        lines = [ "RG   The mouse genome sequencing consortium;"]
        self.assertEqual( rg_parser(lines),
            ['The mouse genome sequencing consortium'])
    def test_rp_parser(self):
        """rp_parser should return expected str"""
        lines = [ "RP   X-RAY CRYSTALLOGRAPHY (1.8 ANGSTROMS)."]
        self.assertEqual( rp_parser(lines),
                 'X-RAY CRYSTALLOGRAPHY (1.8 ANGSTROMS)')
# global test data
ft_lines = \
"""FT   CHAIN        29    262       Granzyme A.
FT                                /FTId=PRO_0000027394.
FT   ACT_SITE     69     69       Charge relay system.
FT   VARIANT     121    121       T -> M (in dbSNP:3104233).
FT                                /FTId=VAR_024291.
FT   VARIANT       1      7       unknown  (in a skin tumor).
FT                                /FTId=VAR_005851.
FT   CONFLICT    282    282       R -> Q (in Ref. 18).
FT   STRAND       30     30
FT   STRAND       33     34
FT   TURN         37     38""".splitlines()
ft_expect = \
{'ACT_SITE': [{'Start': 69, 'End': 69, 'Description': 'Charge relay system'}],
 'CHAIN': [{'Description': {'Description': 'Granzyme A',
                            'Id': 'PRO_0000027394'},
            'End': 262,
            'Start': 29}],
 'CONFLICT': [{'Description': {'Comment': 'in Ref. 18',
                               'MutateFrom': 'R',
                               'MutateTo': 'Q'},
               'End': 282,
               'Start': 282}],
 'SecondaryStructure': [('STRAND', 30, 30),
                        ('STRAND', 33, 34),
                        ('TURN', 37, 38)],
 'VARIANT': [{'Description': {'Comment': 'in dbSNP:3104233',
                              'Id': 'VAR_024291',
                              'MutateFrom': 'T',
                              'MutateTo': 'M'},
              'End': 121,
              'Start': 121},
             {'Description': {'Comment': 'in a skin tumor',
                              'Id': 'VAR_005851',
                              'MutateFrom': 'unknown  ',
                              'MutateTo': ''},
              'End': 7,
              'Start': 1}]}
dr_lines =\
"""DR   MIM; 140050; gene.
DR   GO; GO:0001772; C:immunological synapse; TAS.
DR   GO; GO:0005634; C:nucleus; TAS.
DR   GO; GO:0006915; P:apoptosis; TAS.
DR   GO; GO:0006922; P:cleavage of lamin; IDA.
DR   GO; GO:0006955; P:immune response; TAS.
DR   InterPro; IPR001254; Peptidase_S1_S6.
DR   InterPro; IPR001314; Peptidase_S1A.
DR   Pfam; PF00089; Trypsin; 1.""".splitlines()
dr_expect =\
    {'GO': [['GO:0001772', 'C:immunological synapse', 'TAS'],
        ['GO:0005634', 'C:nucleus', 'TAS'],
        ['GO:0006915', 'P:apoptosis', 'TAS'],
        ['GO:0006922', 'P:cleavage of lamin', 'IDA'],
        ['GO:0006955', 'P:immune response', 'TAS']],
    'Pfam': [['PF00089', 'Trypsin', '1']],
    'InterPro': [['IPR001254', 'Peptidase_S1_S6'],
        ['IPR001314', 'Peptidase_S1A']],
    'MIM': [['140050', 'gene']]}
cc_lines = \
"""CC   -!- ALLERGEN: Causes an allergic reaction in human. Binds to IgE.
CC       bovine dander.
CC       Event=Alternative splicing; Named isoforms=3;
CC         Comment=Additional isoforms seem to exist.
CC         confirmation;
CC       Name=1; Synonyms=AIRE-1;
CC         IsoId=O43918-1; Sequence=Displayed;
CC       Name=2; Synonyms=AIRE-2;
CC         IsoId=O43918-2; Sequence=VSP_004089;
CC       Name=3; Synonyms=AIRE-3;
CC         IsoId=O43918-3; Sequence=VSP_004089, VSP_004090;
CC       Kinetic parameters:
CC         KM=98 uM for ATP;
CC         KM=688 uM for pyridoxal;
CC         Vmax=1.604 mmol/min/mg enzyme;
CC       pH dependence:
CC         Optimum pH is 6.0. Active pH 4.5 to 10.5;
CC   -!- DATABASE: NAME=CD40Lbase;
CC       NOTE=European CD40L defect database (mutation db);
CC       WWW="http://www.expasy.org/cd40lbase/".
CC   -!- DISEASE: Defects in PHKA1 are linked to X-linked muscle
CC       glycogenosis [MIM:311870]. 
CC   -!- DISEASE: Defects in ABCD1 are the cause of recessive X-linked
CC       adrenoleukodystrophy (X-ALD) [MIM:300100]. X-ALD is a rare
CC       phenotype.
CC       Self; NbExp=1; IntAct=EBI-123485, EBI-123485;
CC       Q9W158:CG4612; NbExp=1; IntAct=EBI-123485, EBI-89895;
CC       Q9VYI0:fne; NbExp=1; IntAct=EBI-123485, EBI-126770;
CC       NOTE=Ref.2.
CC   --------------------------------------------------------------------------
CC   This SWISS-PROT entry is copyright. It is produced through a collaboration
CC   removed.
CC   --------------------------------------------------------------------------
fake_records_valid = """ID   CYC_BOVIN      STANDARD;      PRT;   104 AA.
AC   ac1; ac2;
DT   dt.
DE   de.
OS   os.
OC   oc.
OX   NCBI_TaxID=9606;
RN   [1]
SQ   SEQUENCE   486 AA;  55639 MW;  D7862E867AD74383 CRC64
     aac cpp
     ggh hh
ID   idid  std; prt; 104    #-5
OX   NCBI_TaxID=9606;
DE   dede.
# Run tests if called from the command line
if __name__ == '__main__':