#!/usr/bin/env python from cogent.util.unit_test import TestCase, main from cogent.parse.greengenes import MinimalGreengenesParser, make_ignore_f,\ DefaultDelimitedSplitter, SpecificGreengenesParser __author__ = "Daniel McDonald" __copyright__ = "Copyright 2007-2012, The Cogent Project" #consider project name __credits__ = ["Daniel McDonald"] #remember to add yourself if you make changes __license__ = "GPL" __version__ = "1.5.3-dev" __maintainer__ = "Daniel McDonald" __email__ = "daniel.mcdonald@colorado.edu" __status__ = "Prototype" class ParseGreengenesRecordsTests(TestCase): def setUp(self): pass def test_MinimalGreengenesParser_mock(self): """Test MinimalGreengenesParser against mock data""" res = MinimalGreengenesParser(mock_data.splitlines(), RecStart="my_starting", \ RecEnd="my_ending") records = list(res) exp = [{'a':'1','b':'2','c':'3','d':'','e':'5'}, {'q':'asdasd','c':'taco'}] self.assertEqual(records, exp) def test_MinimalGreengenesParser_real(self): """Test MinimalGreengenesParser against real data""" res = MinimalGreengenesParser(real_data.splitlines()) record1, record2 = list(res) self.assertEqual(record1['G2_chip_tax_string'],'Unclassified') self.assertEqual(record1['authors'],'Hernanandez-Eugenio,G., Silva-Rojas,H.V., Zelaya-Molina,L.X.') self.assertEqual(record1['bel3_div_ratio'],'') self.assertEqual(len(record1), 72) self.assertEqual(record2['ncbi_acc_w_ver'],'FJ832719.1') self.assertEqual(record2['timestamp'],'2010-03-23 14:08:27') self.assertEqual(record2['title'],'Developmental Microbial Ecology of the Crop of the Folivorous Hoatzin') def test_SpecificGreengenesParser_real(self): """Test SpecificGreengenesParser against real data""" fields = ['prokMSA_id','journal'] res = SpecificGreengenesParser(real_data.splitlines(), fields) records = list(res) exp = [('604868',''),('604867','ISME J (2010) In press')] self.assertEqual(records, exp) ids = ['604867','12312312323'] res = SpecificGreengenesParser(real_data.splitlines(), fields, ids) records = list(res) exp = [('604867','ISME J (2010) In press')] self.assertEqual(records, exp) def test_make_ignore_f(self): """Properly ignore empty records and the start line""" f = make_ignore_f('testing') self.assertFalse(f(['asasdasd',''])) self.assertFalse(f(['test',''])) self.assertFalse(f(['testing2',''])) self.assertFalse(f(['testing','asd'])) self.assertTrue(f(['',''])) self.assertTrue(f(None)) self.assertTrue(f(['',''])) self.assertTrue(f(['testing',''])) mock_data = """my_starting a=1 b=2 c=3 d= e=5 my_ending my_starting q=asdasd c=taco my_ending """ real_data = """BEGIN G2_chip_tax_string=Unclassified G2_chip_tax_string_format_2=Unclassified HOMD_tax_string= HOMD_tax_string_format_2= Hugenholtz_tax_string=Unclassified Hugenholtz_tax_string_format_2=Unclassified Ludwig_tax_string=Unclassified Ludwig_tax_string_format_2=Unclassified Pace_tax_string=Unclassified Pace_tax_string_format_2=Unclassified RDP_tax_string=Unclassified RDP_tax_string_format_2=Unclassified Silva_tax_string=Unclassified Silva_tax_string_format_2=Unclassified authors=Hernanandez-Eugenio,G., Silva-Rojas,H.V., Zelaya-Molina,L.X. bel3_div_ratio= bellerophon= blast_perc_ident_to_template= clone=51a contact_info=Irrigacion, Universidad Autonoma Chapingo, Carretera Mexico-Texcoco Km 37.5, Texcoco, Mexico 56230, Mexico core_set_member= core_set_member2= country=Mexico: Mexico City create_date=21-NOV-2009 db_name= decision=clone description=Uncultured bacterium clone 51a 16S ribosomal RNA gene, partial sequence email= gold_id= img_oid= isolate= isolation_source=mesophilic anaerobic reactor fed with effluent from the chemical industry journal= longest_insertion= medline_ids= ncbi_acc= ncbi_acc_w_ver=FJ461956.1 ncbi_gi=213390944 ncbi_seq_length=1512 ncbi_tax_id=77133 ncbi_tax_string=Bacteria; environmental samples ncbi_tax_string_format_2=Unclassified non_ACGT_count= non_ACGT_percent= note= organism=uncultured bacterium perc_ident_to_invariant_core= prokMSA_id=604868 prokMSAname=Microbial ecology industrial digestor mesophilic anaerobic reactor fed effluent chemical industry clone 51a pubmed_ids= remark= replaced_by= single_nt_runs_over_7= small_gap_intrusions= source=uncultured bacterium span_aligned=1..2 specific_host= status=0 strain= study_id=38002 sub_species= submit_date=24-OCT-2008 template= timestamp=2010-03-23 14:08:27 title=Microbial ecology of industrial anaerobic digestor unaligned_length= update_date=21-NOV-2009 warning= wigeon95= wigeon99= wigeon_std_dev= aligned_seq=unaligned END BEGIN G2_chip_tax_string=Unclassified G2_chip_tax_string_format_2=Unclassified HOMD_tax_string= HOMD_tax_string_format_2= Hugenholtz_tax_string=Unclassified Hugenholtz_tax_string_format_2=Unclassified Ludwig_tax_string=Unclassified Ludwig_tax_string_format_2=Unclassified Pace_tax_string=Unclassified Pace_tax_string_format_2=Unclassified RDP_tax_string=Unclassified RDP_tax_string_format_2=Unclassified Silva_tax_string=Unclassified Silva_tax_string_format_2=Unclassified authors=Brodie,E.L., Dominguez-Bello,M.G., Garcia-Amado,M.A., Godoy-Vitorino,F., Goldfarb,K.C., Michelangeli,F. bel3_div_ratio= bellerophon= blast_perc_ident_to_template= clone=J3Q101_11C02 contact_info=Biology, University of Puerto Rico, Rio Piedras Campus, PO Box 23360, San Juan, PR 00931-3360, USA core_set_member= core_set_member2= country=Venezuela create_date=10-DEC-2009 db_name= decision=clone description=Uncultured bacterium clone J3Q101_11C02 16S ribosomal RNA gene, partial sequence email= gold_id= img_oid= isolate= isolation_source=crop contents journal=ISME J (2010) In press longest_insertion= medline_ids= ncbi_acc= ncbi_acc_w_ver=FJ832719.1 ncbi_gi=226447371 ncbi_seq_length=1326 ncbi_tax_id=77133 ncbi_tax_string=Bacteria; environmental samples ncbi_tax_string_format_2=Unclassified non_ACGT_count= non_ACGT_percent= note= organism=uncultured bacterium perc_ident_to_invariant_core= prokMSA_id=604867 prokMSAname=Microbial Ecology Crop Folivorous Hoatzin crop contents clone J3Q101_11C02 pubmed_ids= remark= replaced_by= single_nt_runs_over_7= small_gap_intrusions= source=uncultured bacterium span_aligned=1..2 specific_host= status=0 strain= study_id=37901 sub_species= submit_date=16-MAR-2009 template= timestamp=2010-03-23 14:08:27 title=Developmental Microbial Ecology of the Crop of the Folivorous Hoatzin unaligned_length= update_date=10-DEC-2009 warning= wigeon95= wigeon99= wigeon_std_dev= aligned_seq=unaligned END """ if __name__ == '__main__': main()