Did I find the right examples for you? yes no      Crawl my project      Python Jobs

All Samples(35)  |  Call(35)  |  Derive(0)  |  Import(0)
Index several sequence files and return a dictionary like object.

The index is stored in an SQLite database rather than in memory (as in the
Bio.SeqIO.index(...) function).

 - index_filename - Where to store the SQLite index
 - filenames - list of strings specifying file(s) to be indexed, or when
              indexing a single file this can be given as a string.
              (optional if reloading an existing index, but must match)
 - format   - lower case string describing the file format(more...)

        def index_db(index_filename, filenames=None, format=None, alphabet=None,
             key_function=None):
    """Index several sequence files and return a dictionary like object.

    The index is stored in an SQLite database rather than in memory (as in the
    Bio.SeqIO.index(...) function).

     - index_filename - Where to store the SQLite index
     - filenames - list of strings specifying file(s) to be indexed, or when
                  indexing a single file this can be given as a string.
                  (optional if reloading an existing index, but must match)
     - format   - lower case string describing the file format
                  (optional if reloading an existing index, but must match)
     - alphabet - optional Alphabet object, useful when the sequence type
                  cannot be automatically inferred from the file itself
                  (e.g. format="fasta" or "tab")
     - key_function - Optional callback function which when given a
                  SeqRecord identifier string should return a unique
                  key for the dictionary.

    This indexing function will return a dictionary like object, giving the
    SeqRecord objects as values:

    >>> from Bio.Alphabet import generic_protein
    >>> from Bio import SeqIO
    >>> files = ["GenBank/NC_000932.faa", "GenBank/NC_005816.faa"]
    >>> def get_gi(name):
    ...     parts = name.split("|")
    ...     i = parts.index("gi")
    ...     assert i != -1
    ...     return parts[i+1]
    >>> idx_name = ":memory:" #use an in memory SQLite DB for this test
    >>> records = SeqIO.index_db(idx_name, files, "fasta", generic_protein, get_gi)
    >>> len(records)
    95
    >>> records["7525076"].description
    'gi|7525076|ref|NP_051101.1| Ycf2 [Arabidopsis thaliana]'
    >>> records["45478717"].description
    'gi|45478717|ref|NP_995572.1| pesticin [Yersinia pestis biovar Microtus str. 91001]'

    In this example the two files contain 85 and 10 records respectively.

    BGZF compressed files are supported, and detected automatically. Ordinary
    GZIP compressed files are not supported.

    See also: Bio.SeqIO.index() and Bio.SeqIO.to_dict()
    """
    #Try and give helpful error messages:
    if not isinstance(index_filename, basestring):
        raise TypeError("Need a string for the index filename")
    if isinstance(filenames, basestring):
        #Make the API a little more friendly, and more similar
        #to Bio.SeqIO.index(...) for indexing just one file.
        filenames = [filenames]
    if filenames is not None and not isinstance(filenames, list):
        raise TypeError(
            "Need a list of filenames (as strings), or one filename")
    if format is not None and not isinstance(format, basestring):
        raise TypeError("Need a string for the file format (lower case)")
    if format and format != format.lower():
        raise ValueError("Format string '%s' should be lower case" % format)
    if alphabet is not None and not (isinstance(alphabet, Alphabet) or
                                     isinstance(alphabet, AlphabetEncoder)):
        raise ValueError("Invalid alphabet, %s" % repr(alphabet))

    #Map the file format to a sequence iterator:
    from ._index import _FormatToRandomAccess  # Lazy import
    from Bio.File import _SQLiteManySeqFilesDict
    repr = "SeqIO.index_db(%r, filenames=%r, format=%r, alphabet=%r, key_function=%r)" \
               % (index_filename, filenames, format, alphabet, key_function)

    def proxy_factory(format, filename=None):
        """Given a filename returns proxy object, else boolean if format OK."""
        if filename:
            return _FormatToRandomAccess[format](filename, format, alphabet)
        else:
            return format in _FormatToRandomAccess

    return _SQLiteManySeqFilesDict(index_filename, filenames,
                                   proxy_factory, format,
                                   key_function, repr)
        


src/a/r/ARC-HEAD/ARC/app.py   ARC(Download)
                    print s['PE1']
                    p1 = SeqIO.index_db(
                        os.path.join(working_dir, "PE1.idx"),
                        s['PE1'],
                        format,
                    print s['PE2']
                    p2 = SeqIO.index_db(
                        os.path.join(working_dir, "PE2.idx"),
                        s['PE2'],
                        format,
                    print s['SE']
                    SeqIO.index_db(
                        os.path.join(working_dir, "SE.idx"),
                        s['SE'],
                        format,

src/p/a/paired_sequence_utils-0.1/paired_sequence_match.py   paired_sequence_utils(Download)
        print "Building index of reads in '%s'" % args[1]
    try:
        r_index = SeqIO.index_db(r_file_index, args[1], "fastq", key_function=split_id)
    except:
        sys.stderr.write("Problem during index creation, aborting\n")
        print "Building index of reads in '%s'" % args[0]
    try:
        f_index = SeqIO.index_db(f_file_index, args[0], "fastq",
                                 key_function=split_id)
    except:

src/a/r/ARC-HEAD/ARC/runners/mapper.py   ARC(Download)
        iteration = self.params['iteration']
        if 'PE1' in self.params and 'PE2' in self.params:
            idx_PE1 = SeqIO.index_db(os.path.join(self.params['working_dir'], "PE1.idx"), key_function=lambda x: x.split("/")[0])
            idx_PE2 = SeqIO.index_db(os.path.join(self.params['working_dir'], "PE2.idx"), key_function=lambda x: x.split("/")[0])
        if 'SE' in self.params:
            idx_SE = SeqIO.index_db(os.path.join(self.params['working_dir'], "SE.idx"), key_function=lambda x: x.split("/")[0])

src/p/i/picobio-HEAD/sambam/sam_seq_equals.py   picobio(Download)
try:
    import sqlite3
    reference = SeqIO.index_db(reference_filename+".idx", reference_filename, "fasta")
except ImportError:
    reference = SeqIO.index(reference_filename, "fasta")

src/p/i/picobio-HEAD/blooming_reads/re_pair_circular_sam.py   picobio(Download)
        if os.path.isfile(idx):
            sys.stderr.write("Loading %s\n" % idx)
            raw = SeqIO.index_db(idx)
        else:
            sys.stderr.write("Creating %s\n" % idx)
            raw= SeqIO.index_db(idx, raw_reads, "fastq")

src/b/i/biofrills-0.3.1/biofrills/sequtils.py   biofrills(Download)
            else:
                try:
                    index = SeqIO.index_db(refcache)
                except Exception:
                    logging.warn("Skipping corrupted cache; rebuilding index")
    if index is None:
        # Rebuild the index, for whatever reason
        index = SeqIO.index_db(refcache, [reffile], 'fasta')
 
    # Extract records by key

src/b/i/biopython-1.63/Tests/test_SeqIO_index.py   biopython(Download)
        def test_old(self):
            """Load existing index with no options."""
            d = SeqIO.index_db("Roche/triple_sff.idx")
            self.assertEqual(54, len(d))
 
        def test_old_format(self):
            """Load existing index with correct format."""
            d = SeqIO.index_db("Roche/triple_sff.idx", format="sff")
        def test_old_files(self):
            """Load existing index with correct files."""
            d = SeqIO.index_db("Roche/triple_sff.idx",
                               ["E3MFGYR02_no_manifest.sff", "greek.sff", "paired.sff"])
            self.assertEqual(54, len(d))
        #In memory,
        #note here give filenames as list of strings
        rec_dict = SeqIO.index_db(":memory:", [filename], format,
                                  alphabet)
        self.check_dict_methods(rec_dict, id_list, id_list)
        #note here we give the filename as a single string
        #to confirm that works too (convience feature).
        rec_dict = SeqIO.index_db(index_tmp, filename, format,
                                  alphabet)
        self.check_dict_methods(rec_dict, id_list, id_list)

src/b/i/biopython-HEAD/Tests/test_SeqIO_index.py   biopython(Download)
        def test_old(self):
            """Load existing index with no options."""
            d = SeqIO.index_db("Roche/triple_sff.idx")
            self.assertEqual(54, len(d))
 
        def test_old_format(self):
            """Load existing index with correct format."""
            d = SeqIO.index_db("Roche/triple_sff.idx", format="sff")
        def test_old_files(self):
            """Load existing index with correct files."""
            d = SeqIO.index_db("Roche/triple_sff.idx",
                               ["E3MFGYR02_no_manifest.sff", "greek.sff", "paired.sff"])
            self.assertEqual(54, len(d))
        #In memory,
        #note here give filenames as list of strings
        rec_dict = SeqIO.index_db(":memory:", [filename], format,
                                  alphabet)
        self.check_dict_methods(rec_dict, id_list, id_list)
        #note here we give the filename as a single string
        #to confirm that works too (convience feature).
        rec_dict = SeqIO.index_db(index_tmp, filename, format,
                                  alphabet)
        self.check_dict_methods(rec_dict, id_list, id_list)