Did I find the right examples for you? yes no      Crawl my project      Python Jobs

All Samples(7)  |  Call(4)  |  Derive(0)  |  Import(3)
It guesses if the file is nucleotide or protein

        def guess_seq_type(fhand):
    '''It guesses if the file is nucleotide or protein'''
    rna = set(ambiguous_rna_letters)
    dna = set(ambiguous_dna_letters)
    rna_dna = rna.union(dna)

    protein = set(extended_protein_letters)
    only_prot = list(protein.difference(rna_dna))

    chunk_size = 1024
    chunk = peek_chunk_from_file(fhand, chunk_size)
    if not chunk:
        raise UnknownFormatError('The file is empty')
    fhand_ = cStringIO.StringIO(chunk)
    total_letters = 0
    nucleotides = 0
    for seq in read_seqs([fhand_]):
        for letter in get_str_seq(seq):
            total_letters += 1
            if letter in ('gcatnuGCATNU'):
                nucleotides += 1
            if letter in only_prot:
                return 'prot'
    nucl_freq = nucleotides / total_letters
    if nucl_freq > 0.8:
        return 'nucl'

    raise RuntimeError('unable to guess the seq type')
        


src/s/e/seq_crumbs-HEAD/crumbs/blast.py   seq_crumbs(Download)
 
from crumbs.utils.optional_modules import NCBIWWW
from crumbs.seqio import seqio, guess_seq_type, write_seqs
from crumbs.utils.bin_utils import (check_process_finishes, popen,
                                    get_binary_path)
                  copy_if_same_format=False)
        if dbtype is None:
            dbtype = guess_seq_type(open(dbpath))
        _makeblastdb_plus(dbpath, dbtype)
    return dbpath

src/s/e/seq_crumbs-HEAD/test/test_seqio.py   seq_crumbs(Download)
from crumbs.utils.test_utils import TEST_DATA_DIR
from crumbs.utils.bin_utils import BIN_DIR
from crumbs.seqio import (guess_seq_type, fastaqual_to_fasta, seqio,
                          _write_seqrecords, _read_seqrecords,
                          _itemize_fastx, read_seqs, write_seqs)
    def test_guess_seq_type(self):
        'It guesses if the sequence is nucleotide or protein'
        fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes')
        assert guess_seq_type(open(fpath)) == 'nucl'
 
        fpath = os.path.join(TEST_DATA_DIR, 'pairend2.sfastq')
        assert guess_seq_type(open(fpath)) == 'nucl'

src/s/e/seq_crumbs-HEAD/test/test_utils.py   seq_crumbs(Download)
from crumbs.utils.test_utils import TEST_DATA_DIR
from crumbs.utils.tags import ERROR_ENVIRON_VARIABLE
from crumbs.seqio import guess_seq_type
from crumbs.settings import get_setting
from crumbs.utils.file_formats import get_format, FILEFORMAT_INVENTORY,\
    def test_guess_seq_type():
        'It checks that we can guess the type of the seqs in a file'
        fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes')
        guess_seq_type(open(fpath))