Did I find the right examples for you? yes no      Crawl my project      Python Jobs

All Samples(41)  |  Call(41)  |  Derive(0)  |  Import(0)
Indexes a sequence file and returns a dictionary like object.

 - filename - string giving name of file to be indexed
 - format   - lower case string describing the file format
 - alphabet - optional Alphabet object, useful when the sequence type
              cannot be automatically inferred from the file itself
              (e.g. format="fasta" or "tab")
 - key_function - Optional callback function which when given a
              SeqRecord identifier string should return a unique
              key for the dictionary.(more...)

        def index(filename, format, alphabet=None, key_function=None):
    """Indexes a sequence file and returns a dictionary like object.

     - filename - string giving name of file to be indexed
     - format   - lower case string describing the file format
     - alphabet - optional Alphabet object, useful when the sequence type
                  cannot be automatically inferred from the file itself
                  (e.g. format="fasta" or "tab")
     - key_function - Optional callback function which when given a
                  SeqRecord identifier string should return a unique
                  key for the dictionary.

    This indexing function will return a dictionary like object, giving the
    SeqRecord objects as values:

    >>> from Bio import SeqIO
    >>> records = SeqIO.index("Quality/example.fastq", "fastq")
    >>> len(records)
    3
    >>> sorted(records)
    ['EAS54_6_R1_2_1_413_324', 'EAS54_6_R1_2_1_443_348', 'EAS54_6_R1_2_1_540_792']
    >>> print(records["EAS54_6_R1_2_1_540_792"].format("fasta"))
    >EAS54_6_R1_2_1_540_792
    TTGGCAGGCCAAGGCCGATGGATCA
    
    >>> "EAS54_6_R1_2_1_540_792" in records
    True
    >>> print(records.get("Missing", None))
    None

    If the file is BGZF compressed, this is detected automatically. Ordinary
    GZIP files are not supported:

    >>> from Bio import SeqIO
    >>> records = SeqIO.index("Quality/example.fastq.bgz", "fastq")
    >>> len(records)
    3
    >>> print(records["EAS54_6_R1_2_1_540_792"].seq)
    TTGGCAGGCCAAGGCCGATGGATCA

    Note that this pseudo dictionary will not support all the methods of a
    true Python dictionary, for example values() is not defined since this
    would require loading all of the records into memory at once.

    When you call the index function, it will scan through the file, noting
    the location of each record. When you access a particular record via the
    dictionary methods, the code will jump to the appropriate part of the
    file and then parse that section into a SeqRecord.

    Note that not all the input formats supported by Bio.SeqIO can be used
    with this index function. It is designed to work only with sequential
    file formats (e.g. "fasta", "gb", "fastq") and is not suitable for any
    interlaced file format (e.g. alignment formats such as "clustal").

    For small files, it may be more efficient to use an in memory Python
    dictionary, e.g.

    >>> from Bio import SeqIO
    >>> records = SeqIO.to_dict(SeqIO.parse(open("Quality/example.fastq"), "fastq"))
    >>> len(records)
    3
    >>> sorted(records)
    ['EAS54_6_R1_2_1_413_324', 'EAS54_6_R1_2_1_443_348', 'EAS54_6_R1_2_1_540_792']
    >>> print(records["EAS54_6_R1_2_1_540_792"].format("fasta"))
    >EAS54_6_R1_2_1_540_792
    TTGGCAGGCCAAGGCCGATGGATCA
    

    As with the to_dict() function, by default the id string of each record
    is used as the key. You can specify a callback function to transform
    this (the record identifier string) into your preferred key. For example:

    >>> from Bio import SeqIO
    >>> def make_tuple(identifier):
    ...     parts = identifier.split("_")
    ...     return int(parts[-2]), int(parts[-1])
    >>> records = SeqIO.index("Quality/example.fastq", "fastq",
    ...                       key_function=make_tuple)
    >>> len(records)
    3
    >>> sorted(records)
    [(413, 324), (443, 348), (540, 792)]
    >>> print(records[(540, 792)].format("fasta"))
    >EAS54_6_R1_2_1_540_792
    TTGGCAGGCCAAGGCCGATGGATCA
    
    >>> (540, 792) in records
    True
    >>> "EAS54_6_R1_2_1_540_792" in records
    False
    >>> print(records.get("Missing", None))
    None

    Another common use case would be indexing an NCBI style FASTA file,
    where you might want to extract the GI number from the FASTA identifer
    to use as the dictionary key.

    Notice that unlike the to_dict() function, here the key_function does
    not get given the full SeqRecord to use to generate the key. Doing so
    would impose a severe performance penalty as it would require the file
    to be completely parsed while building the index. Right now this is
    usually avoided.

    See also: Bio.SeqIO.index_db() and Bio.SeqIO.to_dict()
    """
    #Try and give helpful error messages:
    if not isinstance(filename, basestring):
        raise TypeError("Need a filename (not a handle)")
    if not isinstance(format, basestring):
        raise TypeError("Need a string for the file format (lower case)")
    if not format:
        raise ValueError("Format required (lower case string)")
    if format != format.lower():
        raise ValueError("Format string '%s' should be lower case" % format)
    if alphabet is not None and not (isinstance(alphabet, Alphabet) or
                                     isinstance(alphabet, AlphabetEncoder)):
        raise ValueError("Invalid alphabet, %s" % repr(alphabet))

    #Map the file format to a sequence iterator:
    from ._index import _FormatToRandomAccess # Lazy import
    from Bio.File import _IndexedSeqFileDict
    try:
        proxy_class = _FormatToRandomAccess[format]
    except KeyError:
        raise ValueError("Unsupported format %r" % format)
    repr = "SeqIO.index(%r, %r, alphabet=%r, key_function=%r)" \
        % (filename, format, alphabet, key_function)
    return _IndexedSeqFileDict(proxy_class(filename, format, alphabet),
                               key_function, repr, "SeqRecord")
        


src/b/i/biopython-1.63/Doc/examples/fasta_dictionary.py   biopython(Download)
    return gb_name[:-2]
 
orchid_dict = SeqIO.index("ls_orchid.fasta", "fasta", generic_dna)
 
for id_num in orchid_dict:

src/b/i/biopython-HEAD/Doc/examples/fasta_dictionary.py   biopython(Download)
    return gb_name[:-2]
 
orchid_dict = SeqIO.index("ls_orchid.fasta", "fasta", generic_dna)
 
for id_num in orchid_dict:

src/j/c/jcvi-HEAD/formats/fasta.py   jcvi(Download)
 
        if index:
            self.index = SeqIO.index(filename, "fasta",
                    key_function=key_function)
        else:
    f = Fasta(fastafile)
    if qualfile:
        q = SeqIO.index(qualfile, "qual")
 
    all_keys = list(f.iterkeys())

src/p/i/picobio-HEAD/assembly_comparison/assembly_comparison.py   picobio(Download)
    do_blast(assembly_fasta, reference_fasta, blast_file)
 
contigs = SeqIO.index(assembly_fasta, "fasta")
blast_results = SearchIO.index(blast_file, "blast-tab")
 

src/s/e/seqmagick-0.5.0/seqmagick/transform.py   seqmagick(Download)
        ids = [seq_id for (length, seq_id) in len_and_ids]
    del len_and_ids #free this memory
    record_index = SeqIO.index(source_file.name, source_file_type)
    records = (record_index[seq_id] for seq_id in ids)
 
    if direction == 0:
        ids = reversed(ids)
    record_index = SeqIO.index(source_file.name, source_file_type)
    records = (record_index[id] for id in ids)
 

src/s/e/seqmagick-HEAD/seqmagick/transform.py   seqmagick(Download)
        ids = [seq_id for (length, seq_id) in len_and_ids]
    del len_and_ids #free this memory
    record_index = SeqIO.index(source_file.name, source_file_type)
    records = (record_index[seq_id] for seq_id in ids)
 
    if direction == 0:
        ids = reversed(ids)
    record_index = SeqIO.index(source_file.name, source_file_type)
    records = (record_index[id] for id in ids)
 

src/a/r/ARC-HEAD/contrib/ARC_Call_and_Inject_hets.py   ARC(Download)
    print "Sample: ", sample
 
    contigsidx = SeqIO.index("./finished_%s/contigs.fasta" % sample, "fasta")
 
    #Get a list of targets, use set to get unique targets

src/v/d/vdj-HEAD/refseq.py   vdj(Download)
# process those files
for reference_fasta in files_to_process:
    if ligm_index == None: ligm_index = SeqIO.index( os.path.join(params.imgt_dir,'imgt.dat'), 'imgt')
    group = get_group(reference_fasta)
    reference_imgt = os.path.join(processed_dir,group+'.imgt')

src/p/i/picobio-HEAD/sambam/sam_seq_equals.py   picobio(Download)
    reference = SeqIO.index_db(reference_filename+".idx", reference_filename, "fasta")
except ImportError:
    reference = SeqIO.index(reference_filename, "fasta")
if not reference:
    sys.stderr.write("No sequences found in FASTA reference file %s\n" % reference_filename)

src/a/r/ARC-HEAD/ARC/runners/finisher.py   ARC(Download)
        contig_orientation = {}
        contig_to_isogroup = {}
        contig_idx = SeqIO.index(contigf, "fasta")
        # Parse isotigsf:
        igroup = ""

  1 | 2 | 3  Next