Did I find the right examples for you? yes no      Crawl my project      Python Jobs

All Samples(52)  |  Call(52)  |  Derive(0)  |  Import(0)
Turns a sequence iterator or list into a dictionary.

 - sequences  - An iterator that returns SeqRecord objects,
                or simply a list of SeqRecord objects.
 - key_function - Optional callback function which when given a
                SeqRecord should return a unique key for the dictionary.

e.g. key_function = lambda rec : rec.name
or,  key_function = lambda rec : rec.description.split()[0]
(more...)

        def to_dict(sequences, key_function=None):
    """Turns a sequence iterator or list into a dictionary.

     - sequences  - An iterator that returns SeqRecord objects,
                    or simply a list of SeqRecord objects.
     - key_function - Optional callback function which when given a
                    SeqRecord should return a unique key for the dictionary.

    e.g. key_function = lambda rec : rec.name
    or,  key_function = lambda rec : rec.description.split()[0]

    If key_function is omitted then record.id is used, on the assumption
    that the records objects returned are SeqRecords with a unique id.

    If there are duplicate keys, an error is raised.

    Example usage, defaulting to using the record.id as key:

    >>> from Bio import SeqIO
    >>> filename = "GenBank/cor6_6.gb"
    >>> format = "genbank"
    >>> id_dict = SeqIO.to_dict(SeqIO.parse(filename, format))
    >>> print(sorted(id_dict))
    ['AF297471.1', 'AJ237582.1', 'L31939.1', 'M81224.1', 'X55053.1', 'X62281.1']
    >>> print(id_dict["L31939.1"].description)
    Brassica rapa (clone bif72) kin mRNA, complete cds.

    A more complex example, using the key_function argument in order to
    use a sequence checksum as the dictionary key:

    >>> from Bio import SeqIO
    >>> from Bio.SeqUtils.CheckSum import seguid
    >>> filename = "GenBank/cor6_6.gb"
    >>> format = "genbank"
    >>> seguid_dict = SeqIO.to_dict(SeqIO.parse(filename, format),
    ...               key_function = lambda rec : seguid(rec.seq))
    >>> for key, record in sorted(seguid_dict.items()):
    ...     print("%s %s" % (key, record.id))
    /wQvmrl87QWcm9llO4/efg23Vgg AJ237582.1
    BUg6YxXSKWEcFFH0L08JzaLGhQs L31939.1
    SabZaA4V2eLE9/2Fm5FnyYy07J4 X55053.1
    TtWsXo45S3ZclIBy4X/WJc39+CY M81224.1
    l7gjJFE6W/S1jJn5+1ASrUKW/FA X62281.1
    uVEYeAQSV5EDQOnFoeMmVea+Oow AF297471.1

    This approach is not suitable for very large sets of sequences, as all
    the SeqRecord objects are held in memory. Instead, consider using the
    Bio.SeqIO.index() function (if it supports your particular file format).
    """
    if key_function is None:
        key_function = lambda rec: rec.id

    d = dict()
    for record in sequences:
        key = key_function(record)
        if key in d:
            raise ValueError("Duplicate key '%s'" % key)
        d[key] = record
    return d
        


src/b/i/biopython-1.63/Doc/examples/fasta_dictionary.py   biopython(Download)
 
rec_iterator = SeqIO.parse("ls_orchid.fasta", "fasta", generic_dna)
orchid_dict = SeqIO.to_dict(rec_iterator, get_accession_num)
 
for id_num in orchid_dict:

src/b/i/biopython-HEAD/Doc/examples/fasta_dictionary.py   biopython(Download)
 
rec_iterator = SeqIO.parse("ls_orchid.fasta", "fasta", generic_dna)
orchid_dict = SeqIO.to_dict(rec_iterator, get_accession_num)
 
for id_num in orchid_dict:

src/j/c/jcvi-HEAD/formats/fasta.py   jcvi(Download)
            _key_function = (lambda rec: key_function(rec.description)) if \
                    key_function else None
            self.index = SeqIO.to_dict(SeqIO.parse(must_open(filename), "fasta"),
                    key_function=_key_function)
 

src/b/c/bcbb-HEAD/gff/Scripts/gff/gff_to_genbank.py   bcbb(Download)
def main(gff_file, fasta_file):
    out_file = "%s.gb" % os.path.splitext(gff_file)[0]
    fasta_input = SeqIO.to_dict(SeqIO.parse(fasta_file, "fasta", generic_dna))
    gff_iter = GFF.parse(gff_file, fasta_input)
    SeqIO.write(_check_gff(_fix_ncbi_id(gff_iter)), out_file, "genbank")

src/b/c/bcbb-HEAD/biopython/glimmergff_to_proteins.py   bcbb(Download)
def main(glimmer_file, ref_file):
    with open(ref_file) as in_handle:
        ref_recs = SeqIO.to_dict(SeqIO.parse(in_handle, "fasta"))
 
    base, ext = os.path.splitext(glimmer_file)

src/b/i/biopython-1.63/Bio/SCOP/__init__.py   biopython(Download)
        if astral_file:
            #Build a dictionary of SeqRecord objects in the FASTA file, IN MEMORY
            self.fasta_dict = SeqIO.to_dict(SeqIO.parse(astral_file, "fasta"))
 
        self.astral_file = astral_file

src/b/i/biopython-HEAD/Bio/SCOP/__init__.py   biopython(Download)
        if astral_file:
            #Build a dictionary of SeqRecord objects in the FASTA file, IN MEMORY
            self.fasta_dict = SeqIO.to_dict(SeqIO.parse(astral_file, "fasta"))
 
        self.astral_file = astral_file

src/b/c/bcbb-HEAD/gff/Scripts/gff/gff_to_biosql.py   bcbb(Download)
    print "Parsing FASTA sequence file..."
    with open(seq_file) as seq_handle:
        seq_dict = SeqIO.to_dict(SeqIO.parse(seq_handle, "fasta"))
 
    print "Parsing GFF data file..."

src/c/l/CladeCompare-0.2/cladecomparelib/core.py   CladeCompare(Download)
        dict((rec.id, str(rec.seq))
             for rec in SeqIO.parse(StringIO(out), 'fasta')))
    ref_record = SeqIO.to_dict(seqs)[ref_id]
    # Calculate aligned residue numbers & insert ranges
    offset = (ref_record.annotations['start']
        head_lengths[seq['id']] = seq['head_len']
    ref_id, ref_aln = choose_best_aligned(hits)
    ref_record = SeqIO.to_dict(seqs)[ref_id]
    offset = (ref_record.annotations['start']
              + head_lengths[ref_id]

src/m/s/msg-HEAD/extract-ref-alleles.py   msg(Download)
            else:
                return open(path)
        ref_seqs = dict(par1=SeqIO.to_dict(SeqIO.parse(open_file(self.options.parent1),"fasta")), 
            par2=SeqIO.to_dict(SeqIO.parse(open_file(self.options.parent2),"fasta")))
 

  1 | 2 | 3  Next