#!/usr/bin/env python
"""
Provides tests for StockholmParser and related classes and functions.
"""
 
from cogent.parse.stockholm import is_gf_line, is_gc_line, is_gs_line, \
    is_gr_line, is_seq_line, is_structure_line, GfToInfo, GcToInfo, GsToInfo, \
    GrToInfo, MinimalStockholmParser, StockholmFinder, \
    StockholmParser, Sequence, is_empty_or_html
from cogent.util.unit_test import TestCase, main
from cogent.parse.record import RecordError
from cogent.core.info import Info
from cogent.struct.rna2d import WussStructure
from cogent.core.alignment import Alignment
from cogent.core.moltype import BYTES
 
__author__ = "Jeremy Widmann"
__copyright__ = "Copyright 2007-2012, The Cogent Project"
__credits__ = ["Jeremy Widmann"]
__license__ = "GPL"
__version__ = "1.5.3-dev"
__maintainer__ = "Jeremy Widmann"
__email__ = "jeremy.widmann@colorado.edu"
__status__ = "Development"
 
Sequence = BYTES.Sequence
 
class StockholmParserTests(TestCase):
    """ Tests componenets of the stockholm parser, in the stockholm.py file """
 
    def setUp(self):
        """ Construct some fake data for testing purposes """
 
        self._fake_headers = []
        temp = list(fake_headers.split('\n'))
        for line in temp:
            self._fake_headers.append(line.strip())
        del temp
 
        self._fake_gc_annotation = []
        temp = list(fake_gc_annotation.split('\n'))
        for line in temp:
            self._fake_gc_annotation.append(line.strip())
        del temp
 
        self._fake_gs_annotation = []
        temp = list(fake_gs_annotation.split('\n'))
        for line in temp:
            self._fake_gs_annotation.append(line.strip())
        del temp
 
        self._fake_gr_annotation = []
        temp = list(fake_gr_annotation.split('\n'))
        for line in temp:
            self._fake_gr_annotation.append(line.strip())
        del temp
 
        self._fake_record_no_headers =\
            list(fake_record_no_headers.split('\n'))
 
        self._fake_record_no_sequences =\
            list(fake_record_no_sequences.split('\n'))
 
        self._fake_record_no_structure =\
            list(fake_record_no_structure.split('\n'))
 
        self._fake_two_records =\
            list(fake_two_records.split('\n'))
 
        self._fake_record =\
            list(fake_record.split('\n'))
 
        self._fake_record_bad_header_1 =\
            list(fake_record_bad_header_1.split('\n'))
 
        self._fake_record_bad_header_2 =\
            list(fake_record_bad_header_2.split('\n'))
 
        self._fake_record_bad_sequence_1 =\
            list(fake_record_bad_sequence_1.split('\n'))
 
        self._fake_record_bad_structure_1 =\
            list(fake_record_bad_structure_1.split('\n'))                                                    
        self._fake_record_bad_structure_2 =\
            list(fake_record_bad_structure_2.split('\n'))
 
        self.single_family = single_family.split('\n')
 
    def test_is_empty_or_html(self):
        """is_empty_or_html: should ignore empty and HTML line"""
        line = '        '
        self.assertEqual(is_empty_or_html(line), True)
        line = '\n\n'
        self.assertEqual(is_empty_or_html(line), True)
        line = '<pre>'
        self.assertEqual(is_empty_or_html(line), True)
        line = '</pre>\n\n'
        self.assertEqual(is_empty_or_html(line), True)
        line = '\t<//\n'
        self.assertEqual(is_empty_or_html(line), False)
 
    def test_is_gf_line(self):
        """is_gf_line: functions correctly w/ various lines """
        self.assertEqual(is_gf_line('#=GF'), True)
        self.assertEqual(is_gf_line('#=GF AC   RF00001'), True)
        self.assertEqual(is_gf_line('#=GF CC   until it is\
            required for transcription. '), True)
 
        self.assertEqual(is_gf_line(''), False)
        self.assertEqual(is_gf_line('X07545.1/505-619 '), False)
        self.assertEqual(is_gf_line('#=G'), False)
        self.assertEqual(is_gf_line('=GF'), False)
        self.assertEqual(is_gf_line('#=GC SS_cons'), False)
 
    def test_is_gc_line(self):
        """is_gc_line: functions correctly w/ various lines """
        self.assertEqual(is_gc_line('#=GC'), True)
        self.assertEqual(is_gc_line('#=GC SS_cons'), True)
        self.assertEqual(is_gc_line('#=GC RF'), True)
 
        self.assertEqual(is_gc_line(''), False)
        self.assertEqual(is_gc_line('X07545.1/505-619 '), False)
        self.assertEqual(is_gc_line('#=G'), False)
        self.assertEqual(is_gc_line('=GF'), False)
        self.assertEqual(is_gc_line('#=GR SS'), False)
 
    def test_is_gs_line(self):
        """is_gs_line: functions correctly w/ various lines """
        self.assertEqual(is_gs_line('#=GS'), True)
        self.assertEqual(is_gs_line('#=GS Seq1   AC'), True)
        self.assertEqual(is_gs_line('#=GS Seq1   DE'), True)
 
        self.assertEqual(is_gs_line(''), False)
        self.assertEqual(is_gs_line('X07545.1/505-619 '), False)
        self.assertEqual(is_gs_line('#=G'), False)
        self.assertEqual(is_gs_line('=GF'), False)
        self.assertEqual(is_gs_line('#=GC SS_cons'), False)
 
    def test_is_gr_line(self):
        """is_gr_line: functions correctly w/ various lines """
        self.assertEqual(is_gr_line('#=GR'), True)
        self.assertEqual(is_gr_line('#=GR SS   ..<<..>>..'), True)
        self.assertEqual(is_gr_line('#=GR RF   cGGacG'), True)
 
        self.assertEqual(is_gr_line(''), False)
        self.assertEqual(is_gr_line('X07545.1/505-619 '), False)
        self.assertEqual(is_gr_line('#=G'), False)
        self.assertEqual(is_gr_line('=GF'), False)
        self.assertEqual(is_gr_line('#=GC SS_cons'), False)
 
    def test_is_seq_line(self):
        """is_seq_line: functions correctly w/ various lines """
        s = 'X07545.1/505-619                     .\
            .ACCCGGC.CAUA...GUGGCCG.GGCAA.CAC.CCGG.U.C..UCGUU'
        assert is_seq_line('s')
        assert is_seq_line('X07545.1/505-619')
        assert is_seq_line('M21086.1/8-123')
 
        assert not is_seq_line('')
        assert not is_seq_line('#GF=')
        assert not is_seq_line('//blah')
 
    def test_is_structure_line(self):
        """is_structure_line: functions correctly w/ various lines """
        s = '#=GC SS_cons\
            <<<<<<<<<........<<.<<<<.<...<.<...<<<<.<.<.......'
        self.assertEqual(is_structure_line(s), True)
        self.assertEqual(is_structure_line('#=GC SS_cons'), False)
        self.assertEqual(is_structure_line('#=GC SS_cons2'), False)
        self.assertEqual(is_structure_line('#=GC SS_cons '), True)
 
        self.assertEqual(is_structure_line(''), False)
        self.assertEqual(is_structure_line(' '), False)
        self.assertEqual(is_structure_line('#=GF AC   RF00001'), False)
        self.assertEqual(is_structure_line('X07545.1/505-619'), False)
        self.assertEqual(is_structure_line('=GC SS_cons'), False)
        self.assertEqual(is_structure_line('#=GC'), False)
        self.assertEqual(is_structure_line('#=GC RF'), False)
 
    def test_GfToInfo(self):
        """GfToInfo: correctly builds info object from header information"""
        info = GfToInfo(self._fake_headers)
        self.assertEqual(info['AccessionNumber'], 'RF00001')
        self.assertEqual(info['Identification'], '5S_rRNA')
        self.assertEqual(info['Comment'], 'This is a short comment')
        self.assertEqual(info['Author'], 'Griffiths-Jones SR')
        self.assertEqual(info['Sequences'], '606')
        self.assertEqual(info['DatabaseReference'],\
            ['URL; http://oberon.fvms.ugent.be:8080/rRNA/ssu/index.html;',\
            'URL; http://rdp.cme.msu.edu/html/;'])
        self.assertEqual(info['PK'],'not real')
 
    def test_GfToInfo_invalid_data(self):
        """GfToInfo: correctly raises error when necessary """
        invalid_headers = [['#=GF ACRF00001'],['#=GFACRF00001']]
        for h in invalid_headers:
            self.assertRaises(RecordError, GfToInfo, h)
 
    def test_GcToInfo(self):
        """GcToInfo: correctly builds info object from header information"""
        info = GcToInfo(self._fake_gc_annotation)
        self.assertEqual(info['ConsensusSecondaryStructure'], \
            '..........<<<<<<<<<<.....>>>>>>>>>>..')
        self.assertEqual(info['ReferenceAnnotation'], \
            'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
 
    def test_GcToInfo_invalid_data(self):
        """GcToInfo: correctly raises error when necessary """
        invalid_headers = [['#=GCSS_cons ..<<..>>..'],['#=GCSAxxxxxxx']]
        for h in invalid_headers:
            self.assertRaises(RecordError, GcToInfo, h)
 
    def test_GsToInfo(self):
        """GsToInfo: correctly builds info object from header information"""
        info = GsToInfo(self._fake_gs_annotation)
        self.assertEqual(info['BasePair'], \
            {'1N77_C':['0 70 cWW CC','1 69 cWW CC','2 68 cWW CC',\
                '3 67 cWW CC']})
 
    def test_GsToInfo_invalid_data(self):
        """GsToInfo: correctly raises error when necessary """
        invalid_headers = [['#=GSBPS 0 10 cwW CC'],['#=GSACRF00001']]
        for h in invalid_headers:
            self.assertRaises(RecordError, GsToInfo, h)
 
    def test_GrToInfo(self):
        """GrToInfo: correctly builds info object from header information"""
        info = GrToInfo(self._fake_gr_annotation)
        self.assertEqual(info['SecondaryStructure'], \
            {'1N77_C':'..........<<<<<<<<<<.....>>>>>>>>>>..'})
        self.assertEqual(info['ReferenceAnnotation'], \
            {'1N77_C':'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'})
 
    def test_GrToInfo_invalid_data(self):
        """GrToInfo: correctly raises error when necessary """
        invalid_headers = [['#=GRSS ..<<..>>..'],['#=GRSAxxxxxx']]
        for h in invalid_headers:
            self.assertRaises(RecordError, GrToInfo, h)
 
    def test_StockholmStockholmParser_strict_missing_fields(self):
        """MinimalStockholmParser: toggle strict functions w/ missing fields"""
        # strict = True
 
        self.assertRaises(RecordError,list,\
            MinimalStockholmParser(self._fake_record_no_sequences))
 
        # strict = False
        # no header shouldn't be a problem
        headers, aln, struct = \
            list(MinimalStockholmParser(self._fake_record_no_headers,\
                strict=False))[0]
        self.assertEqual((headers,aln.todict(),str(struct)), \
            ({'GS':[],'GF':[],'GR':[],\
                'GC':['#=GC SS_cons                         ............>>>']},\
                {'Z11765.1/1-89':'GGUC'},'............>>>'))
        # should get empty on missing sequence or missing structure
        self.assertEqual(list(MinimalStockholmParser(\
            self._fake_record_no_sequences,\
            strict=False)), [])
 
    def test_MinimalStockholmParser_strict_invalid_sequence(self):
        """MinimalStockholmParser: toggle strict functions w/ invalid seq
        """
        #strict = True
        self.assertRaises(RecordError,list,\
            MinimalStockholmParser(self._fake_record_bad_sequence_1))
 
        # strict = False
        # you expect to get back as much information as possible, also
        # half records or sequences
        result = MinimalStockholmParser(\
            self._fake_record_bad_sequence_1,strict=False)
        self.assertEqual(len(list(MinimalStockholmParser(\
            self._fake_record_bad_sequence_1,strict=False))[0][1].NamedSeqs), 3)            
 
    def test_StockholmParser_strict_invalid_structure(self):
        """StockholmParser: toggle strict functions w/ invalid structure
        """
        #strict = True
        self.assertRaises(RecordError,list,\
            StockholmParser(self._fake_record_bad_structure_1))
 
        # strict = False
        self.assertEqual(list(MinimalStockholmParser(\
            self._fake_record_bad_structure_1,strict=False))[0][2],None)                                
 
    def test_MinimalStockholmParser_w_valid_data(self):
        """MinimalStockholmParser: integrity of output """
 
        # Some ugly constructions here, but this is what the output of
        # parsing fake_two_records should be
        headers = ['#=GF AC   RF00014','#=GF AU   Mifsud W']
        sequences =\
        {'U17136.1/898-984':\
        ''.join(['AACACAUCAGAUUUCCUGGUGUAACGAAUUUUUUAAGUGCUUCUUGCUUA',\
            'AGCAAGUUUCAUCCCGACCCCCUCAGGGUCGGGAUUU']),\
        'M15749.1/155-239':\
        ''.join(['AACGCAUCGGAUUUCCCGGUGUAACGAA-UUUUCAAGUGCUUCUUGCAUU',\
            'AGCAAGUUUGAUCCCGACUCCUG-CGAGUCGGGAUUU']),\
        'AF090431.1/222-139':\
        ''.join(['CUCACAUCAGAUUUCCUGGUGUAACGAA-UUUUCAAGUGCUUCUUGCAUA',\
            'AGCAAGUUUGAUCCCGACCCGU--AGGGCCGGGAUUU'])}
 
        structure = WussStructure(''.join(\
        ['...<<<<<<<.....>>>>>>>....................<<<<<...',\
        '.>>>>>....<<<<<<<<<<.....>>>>>>>>>>..']))
 
        data = []
        for r in MinimalStockholmParser(self._fake_two_records, strict=False):
            data.append(r)
        self.assertEqual(\
            (data[0][0]['GF'],data[0][1].todict(),\
                str(data[0][2])),(headers,sequences,structure))
        assert isinstance(data[0][1],Alignment)
 
        # This line tests that invalid entries are ignored when strict=False
        # Note, there are two records in self._fake_two_records, but 2nd is
        # invalid
        self.assertEqual(len(data),1)            
 
    def test_StockholmFinder(self):
        """StockholmFinder: integrity of output """
        fake_record = ['a','//','b','b','//']
        num_records = 0
        data = []
        for r in StockholmFinder(fake_record):
            data.append(r)
            num_records += 1
        self.assertEqual(num_records, 2)
        self.assertEqual(data[0], ['a','//'])
        self.assertEqual(data[1], ['b','b','//'])
 
 
    def test_StockholmParser(self):
        """StockholmParser: integrity of output """
 
        expected_sequences =\
        [''.join(['AACACAUCAGAUUUCCUGGUGUAACGAAUUUUUUAAGUGCUUCUUGCUUA',\
            'AGCAAGUUUCAUCCCGACCCCCUCAGGGUCGGGAUUU']),\
        ''.join(['AACGCAUCGGAUUUCCCGGUGUAACGAA-UUUUCAAGUGCUUCUUGCAUU',\
            'AGCAAGUUUGAUCCCGACUCCUG-CGAGUCGGGAUUU']),\
        ''.join(['CUCACAUCAGAUUUCCUGGUGUAACGAA-UUUUCAAGUGCUUCUUGCAUA',\
            'AGCAAGUUUGAUCCCGACCCGU--AGGGCCGGGAUUU'])]
        expected_structure = ''.join(\
        ['...<<<<<<<.....>>>>>>>....................<<<<<...',\
        '.>>>>>....<<<<<<<<<<.....>>>>>>>>>>..'])            
 
        for r in StockholmParser(self._fake_record):
            headers = r.Info
            sequences = r
            structure = r.Info['Struct']
            self.assertEqual(headers['GF']['AccessionNumber'], 'RF00014')
            self.assertEqual(headers['GF']['Author'], 'Mifsud W')
            self.assertEqualItems(sequences.values(), expected_sequences)
            assert isinstance(sequences, Alignment)
            self.assertEqual(structure, expected_structure)
            assert isinstance(structure,WussStructure)
 
    def test_StockholmParser_strict_missing_fields(self):
        """StockholmParser: toggle strict functions correctly """
        # strict = True
        self.assertRaises(RecordError,list,\
            StockholmParser(self._fake_record_no_headers))
 
        # strict = False
        self.assertEqual(list(StockholmParser(self._fake_record_no_headers,\
            strict=False)), [])
        self.assertEqual(list(StockholmParser(self._fake_record_no_sequences,\
            strict=False)), [])
 
    def test_StockholmParser_strict_invalid_headers(self):
        """StockholmParser: functions when toggling strict record w/ bad header
        """
        self.assertRaises(RecordError,list,\
            StockholmParser(self._fake_record_bad_header_1))
 
        self.assertRaises(RecordError,list,\
            StockholmParser(self._fake_record_bad_header_2))
 
        # strict = False
        x =  list(StockholmParser(self._fake_record_bad_header_1, strict=False))
        obs = list(StockholmParser(self._fake_record_bad_header_1,\
            strict=False))[0].Info.GF.keys()
        self.assertEqual(len(obs),1)
        obs = list(StockholmParser(self._fake_record_bad_header_2,\
            strict=False))[0].Info.GF.keys()
        self.assertEqual(len(obs),1)
 
    def test_StockholmParser_strict_invalid_sequences(self):
        """StockholmParser: functions when toggling strict w/ record w/ bad seq
        """
        self.assertRaises(RecordError,list,
            MinimalStockholmParser(self._fake_record_bad_sequence_1))
 
        # strict = False
        # in 'False' mode you expect to get back as much as possible, also
        # parts of sequences
        self.assertEqual(len(list(StockholmParser(\
            self._fake_record_bad_sequence_1,\
            strict=False))[0].NamedSeqs), 3)           
 
    def test_StockholmParser_strict_invalid_structure(self):
        """StockholmParser: functions when toggling strict record w/ bad struct
        """
        # strict 
        self.assertRaises(RecordError,list,\
            StockholmParser(self._fake_record_bad_structure_2))
        #not strict
        self.assertEqual(list(StockholmParser(\
            self._fake_record_bad_structure_2,\
        strict=False)),[])
 
    def test_StockholmParser_single_family(self):
        """StockholmParser: should work on a family in stockholm format"""
        exp_header = {}
        exp_aln = {'K02120.1/628-682':\
            'AUGGGAAAUUCCCCCUCCUAUAACCCCCCCGCUGGUAUCUCCCCCUCAGACUGGC',\
            'D00647.1/629-683':\
            'AUGGGAAACUCCCCCUCCUAUAACCCCCCCGCUGGCAUCUCCCCCUCAGACUGGC'}
        exp_struct = '<<<<<<.........>>>>>>.........<<<<<<.............>>>>>>'
        aln = list(StockholmParser(self.single_family))[0]
        h = aln.Info['GF']
        a = aln
        s = aln.Info['Struct']
        self.assertEqual(h,exp_header)
        self.assertEqual(a,exp_aln)
        self.assertEqual(s,exp_struct)
 
 
 
# This is an altered version of some header info from Rfam.seed modified to
# incorporate different cases for testing
fake_headers = """#=GF AC   RF00001
#=GF AU   Griffiths-Jones SR
#=GF ID   5S_rRNA
#=GF RT   5S Ribosomal RNA Database.
#=GF DR   URL; http://oberon.fvms.ugent.be:8080/rRNA/ssu/index.html;
#=GF DR   URL; http://rdp.cme.msu.edu/html/;
#=GF CC   This is a short
#=GF CC   comment
#=GF SQ   606
#=GF PK   not real"""
 
fake_gc_annotation = """#=GC SS_cons    ..........<<<<<<<<<<.....>>>>>>>>>>..
#=GC RF    xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
"""
 
fake_gs_annotation = """#=GS 1N77_C BP 0 70 cWW CC
#=GS 1N77_C BP 1 69 cWW CC
#=GS 1N77_C BP 2 68 cWW CC
#=GS 1N77_C BP 3 67 cWW CC
"""
 
fake_gr_annotation = """#=GR 1N77_C SS    ..........<<<<<<<<<<.....>>>>>>>>>>..
#=GR 1N77_C RF    xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
"""
 
fake_record_no_headers ="""Z11765.1/1-89                        GGUC
#=GC SS_cons                         ............>>>
//"""
 
fake_record_no_sequences ="""#=GF AC   RF00006
#=GC SS_cons                         ............>
//"""
 
fake_record_no_structure ="""#=GF AC   RF00006
 
Z11765.1/1-89                        GGUCAGC
//"""
 
fake_two_records ="""# STOCKHOLM 1.0
 
#=GF AC   RF00014
#=GF AU   Mifsud W
 
U17136.1/898-984               AACACAUCAGAUUUCCUGGUGUAACGAAUUUUUUAAGUGCUUCUUGCUUA
M15749.1/155-239               AACGCAUCGGAUUUCCCGGUGUAACGAA.UUUUCAAGUGCUUCUUGCAUU
AF090431.1/222-139             CUCACAUCAGAUUUCCUGGUGUAACGAA.UUUUCAAGUGCUUCUUGCAUA
#=GC SS_cons                   ...<<<<<<<.....>>>>>>>....................<<<<<...
#=GC RF                        xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
 
U17136.1/898-984               AGCAAGUUUCAUCCCGACCCCCUCAGGGUCGGGAUUU
M15749.1/155-239               AGCAAGUUUGAUCCCGACUCCUG.CGAGUCGGGAUUU
AF090431.1/222-139             AGCAAGUUUGAUCCCGACCCGU..AGGGCCGGGAUUU
#=GC SS_cons                   .>>>>>....<<<<<<<<<<.....>>>>>>>>>>..
#=GC RF                        xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
//
#=GF AC   RF00015
//"""
 
fake_record ="""# STOCKHOLM 1.0
 
#=GF AC   RF00014
#=GF AU   Mifsud W
 
U17136.1/898-984               AACACAUCAGAUUUCCUGGUGUAACGAAUUUUUUAAGUGCUUCUUGCUUA
M15749.1/155-239               AACGCAUCGGAUUUCCCGGUGUAACGAA.UUUUCAAGUGCUUCUUGCAUU
AF090431.1/222-139             CUCACAUCAGAUUUCCUGGUGUAACGAA.UUUUCAAGUGCUUCUUGCAUA
#=GC SS_cons                   ...<<<<<<<.....>>>>>>>....................<<<<<...
#=GC RF                        xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
 
U17136.1/898-984               AGCAAGUUUCAUCCCGACCCCCUCAGGGUCGGGAUUU
M15749.1/155-239               AGCAAGUUUGAUCCCGACUCCUG.CGAGUCGGGAUUU
AF090431.1/222-139             AGCAAGUUUGAUCCCGACCCGU..AGGGCCGGGAUUU
#=GC SS_cons                   .>>>>>....<<<<<<<<<<.....>>>>>>>>>>..
#=GC RF                        xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
//"""
 
fake_record_bad_header_1 ="""# STOCKHOLM 1.0
 
#=GF AC   RF00014
#=GF AUMifsudW
 
U17136.1/898-984               AACACAUCAGAUUUCCUGGUGUAACGAAUUUUUUAAGUGCUUCUUGCUUA
M15749.1/155-239               AACGCAUCGGAUUUCCCGGUGUAACGAA.UUUUCAAGUGCUUCUUGCAUU
AF090431.1/222-139             CUCACAUCAGAUUUCCUGGUGUAACGAA.UUUUCAAGUGCUUCUUGCAUA
#=GC SS_cons                   ...<<<<<<<.....>>>>>>>....................<<<<<...
#=GC RF                        xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
 
U17136.1/898-984               AGCAAGUUUCAUCCCGACCCCCUCAGGGUCGGGAUUU
M15749.1/155-239               AGCAAGUUUGAUCCCGACUCCUG.CGAGUCGGGAUUU
AF090431.1/222-139             AGCAAGUUUGAUCCCGACCCGU..AGGGCCGGGAUUU
#=GC SS_cons                   .>>>>>....<<<<<<<<<<.....>>>>>>>>>>..
#=GC RF                        xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
//"""
 
fake_record_bad_header_2 ="""# STOCKHOLM 1.0
 
#=GF AC   RF00014
#=GFAUMifsud W
 
U17136.1/898-984               AACACAUCAGAUUUCCUGGUGUAACGAAUUUUUUAAGUGCUUCUUGCUUA
M15749.1/155-239               AACGCAUCGGAUUUCCCGGUGUAACGAA.UUUUCAAGUGCUUCUUGCAUU
AF090431.1/222-139             CUCACAUCAGAUUUCCUGGUGUAACGAA.UUUUCAAGUGCUUCUUGCAUA
#=GC SS_cons                   ...<<<<<<<.....>>>>>>>....................<<<<<...
#=GC RF                        xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
 
U17136.1/898-984               AGCAAGUUUCAUCCCGACCCCCUCAGGGUCGGGAUUU
M15749.1/155-239               AGCAAGUUUGAUCCCGACUCCUG.CGAGUCGGGAUUU
AF090431.1/222-139             AGCAAGUUUGAUCCCGACCCGU..AGGGCCGGGAUUU
#=GC SS_cons                   .>>>>>....<<<<<<<<<<.....>>>>>>>>>>..
#=GC RF                        xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
//"""
 
fake_record_bad_sequence_1 ="""# STOCKHOLM 1.0
 
#=GF AC   RF00014
#=GF AU   Mifsud W
 
U17136.1/898-984AACACAUCAGAUUUCCUGGUGUAACGAAUUUUUUAAGUGCUUCUUGCUUA
M15749.1/155-239               AACGCAUCGGAUUUCCCGGUGUAACGAA.UUUUCAAGUGCUUCUUGCAUU
AF090431.1/222-139             CUCACAUCAGAUUUCCUGGUGUAACGAA.UUUUCAAGUGCUUCUUGCAUA
#=GC SS_cons                   ...<<<<<<<.....>>>>>>>....................<<<<<...
#=GC RF                        xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
 
U17136.1/898-984               AGCAAGUUUCAUCCCGACCCCCUCAGGGUCGGGAUUU
M15749.1/155-239               AGCAAGUUUGAUCCCGACUCCUG.CGAGUCGGGAUUU
AF090431.1/222-139             AGCAAGUUUGAUCCCGACCCGU..AGGGCCGGGAUUU
#=GC SS_cons                   .>>>>>....<<<<<<<<<<.....>>>>>>>>>>..
#=GC RF                        xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
//"""
 
fake_record_bad_structure_1 ="""# STOCKHOLM 1.0
 
#=GF AC   RF00014
#=GF AU   Mifsud W
 
U17136.1/898-984               AACACAUCAGAUUUCCUGGUGUAACGAAUUUUUUAAGUGCUUCUUGCUUA
M15749.1/155-239               AACGCAUCGGAUUUCCCGGUGUAACGAA.UUUUCAAGUGCUUCUUGCAUU
AF090431.1/222-139             CUCACAUCAGAUUUCCUGGUGUAACGAA.UUUUCAAGUGCUUCUUGCAUA
#=GC SS_cons...<<<<<<<.....>>>>>>>....................<<<<<...
#=GC RF                        xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
 
U17136.1/898-984               AGCAAGUUUCAUCCCGACCCCCUCAGGGUCGGGAUUU
M15749.1/155-239               AGCAAGUUUGAUCCCGACUCCUG.CGAGUCGGGAUUU
AF090431.1/222-139             AGCAAGUUUGAUCCCGACCCGU..AGGGCCGGGAUUU
#=GC SS_cons                   .>>>>>....<<<<<<<<<<.....>>>>>>>>>>..
#=GC RF                        xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
//"""
 
fake_record_bad_structure_2 ="""# STOCKHOLM 1.0
 
#=GF AC   RF00014
#=GF AU   Mifsud W
 
U17136.1/898-984               AACACAUCAGAUUUCCUGGUGUAACGAAUUUUUUAAGUGCUUCUUGCUUA
M15749.1/155-239               AACGCAUCGGAUUUCCCGGUGUAACGAA.UUUUCAAGUGCUUCUUGCAUU
AF090431.1/222-139             CUCACAUCAGAUUUCCUGGUGUAACGAA.UUUUCAAGUGCUUCUUGCAUA
#=GC SS_cons                   ...<<<<<<<.....>>>>>>>....................<<<<<!!!
#=GC RF                        xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
 
U17136.1/898-984               AGCAAGUUUCAUCCCGACCCCCUCAGGGUCGGGAUUU
M15749.1/155-239               AGCAAGUUUGAUCCCGACUCCUG.CGAGUCGGGAUUU
AF090431.1/222-139             AGCAAGUUUGAUCCCGACCCGU..AGGGCCGGGAUUU
#=GC SS_cons                   .>>>>>....<<<<<<<<<<.....>>>>>>>>>>..
#=GC RF                        xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
//"""
 
single_family=\
"""K02120.1/628-682      AUGGGAAAUUCCCCCUCCUAUAACCCCCCCGCUGGUAUCUCCCCCUCAGA
D00647.1/629-683      AUGGGAAACUCCCCCUCCUAUAACCCCCCCGCUGGCAUCUCCCCCUCAGA
#=GC SS_cons          <<<<<<.........>>>>>>.........<<<<<<.............>
 
K02120.1/628-682      CUGGC
D00647.1/629-683      CUGGC
#=GC SS_cons          >>>>>
//"""
 
# Run tests if called from the command line
if __name__ == '__main__':
    main()