# Copyright 2013 by Nate Sutton.  All rights reserved.
# Based on test_Clustalw_tool.py by Peter Cock.
# Example code used from Biopython's Phylo cookbook by Eric Talevich.
#
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.
 
from __future__ import print_function
 
from Bio import MissingExternalDependencyError
 
import sys
import os
import itertools
from Bio._py3k import StringIO
from Bio._py3k import zip
 
from Bio import SeqIO
from Bio import Phylo
from Bio.Phylo.Applications import FastTreeCommandline
from Bio.Phylo.Applications import _Fasttree
from Bio.Application import ApplicationError
 
#################################################################
 
#Try to avoid problems when the OS is in another language
os.environ['LANG'] = 'C'
 
fasttree_exe = None
if sys.platform == "win32":
    try:
        #This can vary depending on the Windows language.
        prog_files = os.environ["PROGRAMFILES"]
    except KeyError:
        prog_files = r"C:\Program Files (x86)"
 
    # A default fasttree file path of "C:\Program Files (x86)\Fasttree.exe"
    # was chosen here but users can alter the path according to where
    # fasttree is located on their systems
 
    likely_dirs = ["", "FastTree"]
    likely_exes = ["FastTree.exe"]
    for folder in likely_dirs:
        if os.path.isdir(os.path.join(prog_files, folder)):
            for filename in likely_exes:
                if os.path.isfile(os.path.join(prog_files, folder, filename)):
                    fasttree_exe = os.path.join(prog_files, folder, filename)
                    break
            if fasttree_exe:
                break
else:
    from Bio._py3k import getoutput
    # Website uses 'FastTree', Nate's system had 'fasttree'
    likely_exes = ["FastTree", "fasttree"]
    for filename in likely_exes:
        # Checking the -help argument
        output = getoutput("%s -help" % filename)
        # Since "is not recognized" may be in another language, try and be sure this
        # is really the fasttree tool's output
        if "is not recognized" not in output and "protein_alignment" in output \
        and "nucleotide_alignment" in output:
            fasttree_exe = filename
            break
 
if not fasttree_exe:
    raise MissingExternalDependencyError(
        "Install FastTree and correctly set the file path to the program "
        "if you want to use it from Biopython.")
 
#################################################################
 
print("Checking error conditions")
print("=========================")
 
print("Empty file")
input_file = "does_not_exist.fasta"
assert not os.path.isfile(input_file)
cline = FastTreeCommandline(fasttree_exe, input=input_file)
try:
    stdout, stderr = cline()
    assert False, "Should have failed, returned:\n%s\n%s" % (stdout, stderr)
except ApplicationError as err:
    print("Failed (good)")
    #Python 2.3 on Windows gave (0, 'Error')
    #Python 2.5 on Windows gives [Errno 0] Error
    assert "Cannot open sequence file" in str(err) or \
           "Cannot open input file" in str(err) or \
           "Non-zero return code " in str(err), str(err)
 
print("")
print("Single sequence")
input_file = "Fasta/f001"
assert os.path.isfile(input_file)
assert len(list(SeqIO.parse(input_file, "fasta")))==1
cline = FastTreeCommandline(fasttree_exe, input=input_file)
try:
    stdout, stderr = cline()
    if "Unique: 1/1" in stderr:
        print("Failed (good)")
    else:
        assert False, "Should have failed, returned:\n%s\n%s" % (stdout, stderr)
except ApplicationError as err:
    print("Failed (good)")
    #assert str(err) == "No records found in handle", str(err)
 
print("")
print("Invalid sequence")
input_file = "Medline/pubmed_result1.txt"
assert os.path.isfile(input_file)
cline = FastTreeCommandline(fasttree_exe, input=input_file)
try:
    stdout, stderr = cline()
    assert False, "Should have failed, returned:\n%s\n%s" % (stdout, stderr)
except ApplicationError as err:
    print("Failed (good)")
    #Ideally we'd catch the return code and raise the specific
    #error for "invalid format", rather than just notice there
    #is not output file.
    #Note:
    #Python 2.3 on Windows gave (0, 'Error')
    #Python 2.5 on Windows gives [Errno 0] Error
    assert "invalid format" in str(err) \
           or "not produced" in str(err) \
           or "No sequences in file" in str(err) \
           or "Non-zero return code " in str(err), str(err)
 
#################################################################
print("")
print("Checking normal situations")
print("==========================")
 
#Create a temp fasta file with a space in the name
temp_filename_with_spaces = "Clustalw/temp horses.fasta"
handle = open(temp_filename_with_spaces, "w")
SeqIO.write(SeqIO.parse("Phylip/hennigian.phy", "phylip"), handle, "fasta")
handle.close()
 
for input_file in ["Quality/example.fasta", "Clustalw/temp horses.fasta"]:
    input_records = SeqIO.to_dict(SeqIO.parse(input_file, "fasta"))
    print("")
    print("Calling fasttree on %s (with %i records)" \
          % (repr(input_file), len(input_records)))
 
    #Any filesnames with spaces should get escaped with quotes automatically.
    #Using keyword arguments here.
    cline = _Fasttree.FastTreeCommandline(fasttree_exe, input=input_file, nt=True)
    assert str(eval(repr(cline)))==str(cline)
 
    out, err = cline()
    assert err.strip().startswith("FastTree")
 
    print("")
    print("Checking generation of tree terminals")
    tree = Phylo.read(StringIO(out), 'newick')
 
    def lookup_by_names(tree):
        names = {}
        for clade in tree.find_clades():
            if clade.name:
                if clade.name in names:
                    raise ValueError("Duplicate key: %s" % clade.name)
                names[clade.name] = clade
        return names
 
    names = lookup_by_names(tree)
 
    assert len(names) > 0.0
    print("Success")
 
    print("")
    print("Checking distances between tree terminals")
    def terminal_neighbor_dists(self):
        """Return a list of distances between adjacent terminals."""
        def generate_pairs(self):
            pairs = itertools.tee(self)
            next(pairs[1]) # Advance second iterator one step
            return zip(pairs[0], pairs[1])
        return [self.distance(*i) for i in
                generate_pairs(self.find_clades(terminal=True))]
 
    for dist in terminal_neighbor_dists(tree):
        assert dist > 0.0
 
    print("Success")
 
print("")
print("Done")