#!/usr/bin/env python # File created on 4 June 2010 from __future__ import division __author__ = "Justin Kuczynski" __copyright__ = "Copyright 2011, The QIIME Project" __credits__ = ["Justin Kuczynski", "Jose Antonio Navas", "Greg Caporaso"] __license__ = "GPL" __version__ = "1.8.0-dev" __maintainer__ = "Justin Kuczynski" __email__ = "justinak@gmail.com" from qiime.util import make_option import os from numpy import array import warnings warnings.filterwarnings('ignore', 'Not using MPI as mpi4py not found') from cogent.maths.unifrac.fast_unifrac import fast_unifrac_permutations_file, TEST_ON_PAIRWISE, TEST_ON_TREE, TEST_ON_ENVS from cogent.maths.unifrac.fast_unifrac import fast_p_test_file from qiime.util import parse_command_line_parameters from qiime.format import format_unifrac_sample_mapping from biom.parse import parse_biom_table script_info = {} script_info[ 'brief_description'] = "This script runs any of a set of common tests to determine if a sample is statistically significantly different from another sample" script_info[ 'script_description'] = "The tests are conducted on each pair of samples present in the input otu table. See the unifrac tutorial online for more details (http://bmf2.colorado.edu/unifrac/tutorial.psp)" script_info['script_usage'] = [] script_info['script_usage'].append( ("Example:", "Perform 100 randomizations of sample/sequence assignments, and record the probability that sample 1 is phylogenetically different from sample 2, using the unifrac monte carlo significance test. The test is run for all pairs of samples.", "%prog -i otu_table.biom -t rep_set.tre -s unweighted_unifrac -o unw_sig.txt")) script_info[ 'output_description'] = "The script outputs a tab delimited text file with each pair of samples and a p value representing the probability that a random sample/sequence assignment will result in more dissimilar samples than the actual pair of samples." script_info['required_options'] = [ make_option('-i', '--input_path', type='existing_filepath', help='input otu table in biom format'), make_option('-o', '--output_path', type='new_filepath', help='output results path'), make_option('-s', '--significance_test', type='choice', choices=[ 'unweighted_unifrac', 'weighted_unifrac', 'weighted_normalized_unifrac', 'p-test'], help="significance test to use, options are 'unweighted_unifrac', 'weighted_unifrac', 'weighted_normalized_unifrac', or 'p-test'"), make_option( '-t', '--tree_path', type='existing_filepath', help='path to newick tree file'), ] script_info['optional_options'] = [ make_option('-n', '--num_iters', default=100, type="int", help='number of monte carlo randomizations [default: %default]'), make_option('-k', '--type_of_test', type='choice', choices=['all_together', 'each_pair', 'each_sample'], default='each_pair', help="type of significance test to perform, options are 'all_together', 'each_pair' or 'each_sample'. [default: %default]"), ] script_info['version'] = __version__ def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) otu_table_fp = opts.input_path otu_table = parse_biom_table(open(otu_table_fp, 'U')) sample_ids = otu_table.SampleIds otu_ids = otu_table.ObservationIds # This is not memory safe: need to be able to load the otu table as ints otu_table_array = array(list(otu_table.iterObservationData()), dtype='int') if opts.type_of_test == 'all_together': type_of_test = TEST_ON_TREE header_text = "sample\tp value\tp value (Bonferroni corrected)\n" elif opts.type_of_test == 'each_pair': type_of_test = TEST_ON_PAIRWISE header_text = "sample 1\tsample 2\tp value\tp value (Bonferroni corrected)\n" elif opts.type_of_test == 'each_sample': type_of_test = TEST_ON_ENVS header_text = "sample\tp value\tp value (Bonferroni corrected)\n" if opts.significance_test == 'p-test': raise RuntimeError( 'significance test type "each_sample" not allowed for p-test') else: raise RuntimeError('significance test type "%s" not found' % opts.type_of_test) # note, uses ugly temp file if opts.significance_test == 'unweighted_unifrac': tree_in = open(opts.tree_path, 'U') output_fp = opts.output_path + '_envs.tmp' result = format_unifrac_sample_mapping( sample_ids, otu_ids, otu_table_array) of = open(output_fp, 'w') of.write('\n'.join(result)) of.close() envs_in = open(output_fp, 'U') result = fast_unifrac_permutations_file(tree_in, envs_in, weighted=False, num_iters=opts.num_iters, verbose=opts.verbose, test_on=type_of_test) envs_in.close() os.remove(output_fp) of = open(opts.output_path, 'w') of.write("#unweighted unifrac significance test\n") of.write(header_text) for line in result: of.write('\t'.join(map(str, line)) + '\n') of.close() elif opts.significance_test == 'p-test': tree_in = open(opts.tree_path, 'U') output_fp = opts.output_path + '_envs.tmp' result = format_unifrac_sample_mapping( sample_ids, otu_ids, otu_table_array) of = open(output_fp, 'w') of.write('\n'.join(result)) of.close() envs_in = open(output_fp, 'U') result = fast_p_test_file(tree_in, envs_in, num_iters=opts.num_iters, verbose=opts.verbose, test_on=type_of_test) envs_in.close() os.remove(output_fp) of = open(opts.output_path, 'w') of.write( "#andy martin's p-test significance test\n") of.write(header_text) for line in result: of.write('\t'.join(map(str, line)) + '\n') of.close() elif opts.significance_test == 'weighted_unifrac': tree_in = open(opts.tree_path, 'U') output_fp = opts.output_path + '_envs.tmp' result = format_unifrac_sample_mapping( sample_ids, otu_ids, otu_table_array) of = open(output_fp, 'w') of.write('\n'.join(result)) of.close() envs_in = open(output_fp, 'U') result = fast_unifrac_permutations_file(tree_in, envs_in, weighted=True, num_iters=opts.num_iters, verbose=opts.verbose, test_on=type_of_test) envs_in.close() os.remove(output_fp) of = open(opts.output_path, 'w') of.write( "#weighted unifrac significance test\n") of.write(header_text) for line in result: of.write('\t'.join(map(str, line)) + '\n') of.close() elif opts.significance_test == 'weighted_normalized_unifrac': tree_in = open(opts.tree_path, 'U') output_fp = opts.output_path + '_envs.tmp' result = format_unifrac_sample_mapping( sample_ids, otu_ids, otu_table_array) of = open(output_fp, 'w') of.write('\n'.join(result)) of.close() envs_in = open(output_fp, 'U') result = fast_unifrac_permutations_file(tree_in, envs_in, weighted='correct', num_iters=opts.num_iters, verbose=opts.verbose, test_on=type_of_test) envs_in.close() os.remove(output_fp) of = open(opts.output_path, 'w') of.write( "#weighted normalized unifrac significance test\n") of.write( "sample 1\tsample 2\tp value\tp value (Bonferroni corrected)\n") for line in result: of.write('\t'.join(map(str, line)) + '\n') of.close() else: raise RuntimeError('significance test "%s" not found' % opts.significance_test) if __name__ == "__main__": main()